Mercurial > hg > CbC > CbC_gcc
diff gcc/config/i386/i386.md @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 1b10fe6932e1 04ced10e8804 |
line wrap: on
line diff
--- a/gcc/config/i386/i386.md Tue May 25 18:58:51 2010 +0900 +++ b/gcc/config/i386/i386.md Tue Mar 22 17:18:12 2011 +0900 @@ -1,6 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -59,201 +59,218 @@ ;; Y -- print condition for XOP pcom* instruction. ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). +;; @ -- print a segment register of thread base pointer load ;; UNSPEC usage: -(define_constants - [; Relocation specifiers - (UNSPEC_GOT 0) - (UNSPEC_GOTOFF 1) - (UNSPEC_GOTPCREL 2) - (UNSPEC_GOTTPOFF 3) - (UNSPEC_TPOFF 4) - (UNSPEC_NTPOFF 5) - (UNSPEC_DTPOFF 6) - (UNSPEC_GOTNTPOFF 7) - (UNSPEC_INDNTPOFF 8) - (UNSPEC_PLTOFF 9) - (UNSPEC_MACHOPIC_OFFSET 10) - - ; Prologue support - (UNSPEC_STACK_ALLOC 11) - (UNSPEC_SET_GOT 12) - (UNSPEC_SSE_PROLOGUE_SAVE 13) - (UNSPEC_REG_SAVE 14) - (UNSPEC_DEF_CFA 15) - (UNSPEC_SET_RIP 16) - (UNSPEC_SET_GOT_OFFSET 17) - (UNSPEC_MEMORY_BLOCKAGE 18) - (UNSPEC_SSE_PROLOGUE_SAVE_LOW 19) - - ; TLS support - (UNSPEC_TP 20) - (UNSPEC_TLS_GD 21) - (UNSPEC_TLS_LD_BASE 22) - (UNSPEC_TLSDESC 23) - - ; Other random patterns - (UNSPEC_SCAS 30) - (UNSPEC_FNSTSW 31) - (UNSPEC_SAHF 32) - (UNSPEC_FSTCW 33) - (UNSPEC_ADD_CARRY 34) - (UNSPEC_FLDCW 35) - (UNSPEC_REP 36) - (UNSPEC_LD_MPIC 38) ; load_macho_picbase - (UNSPEC_TRUNC_NOOP 39) - - ; For SSE/MMX support: - (UNSPEC_FIX_NOTRUNC 40) - (UNSPEC_MASKMOV 41) - (UNSPEC_MOVMSK 42) - (UNSPEC_MOVNT 43) - (UNSPEC_MOVU 44) - (UNSPEC_RCP 45) - (UNSPEC_RSQRT 46) - (UNSPEC_SFENCE 47) - (UNSPEC_PFRCP 49) - (UNSPEC_PFRCPIT1 40) - (UNSPEC_PFRCPIT2 41) - (UNSPEC_PFRSQRT 42) - (UNSPEC_PFRSQIT1 43) - (UNSPEC_MFENCE 44) - (UNSPEC_LFENCE 45) - (UNSPEC_PSADBW 46) - (UNSPEC_LDDQU 47) - (UNSPEC_MS_TO_SYSV_CALL 48) - - ; Generic math support - (UNSPEC_COPYSIGN 50) - (UNSPEC_IEEE_MIN 51) ; not commutative - (UNSPEC_IEEE_MAX 52) ; not commutative - - ; x87 Floating point - (UNSPEC_SIN 60) - (UNSPEC_COS 61) - (UNSPEC_FPATAN 62) - (UNSPEC_FYL2X 63) - (UNSPEC_FYL2XP1 64) - (UNSPEC_FRNDINT 65) - (UNSPEC_FIST 66) - (UNSPEC_F2XM1 67) - (UNSPEC_TAN 68) - (UNSPEC_FXAM 69) - - ; x87 Rounding - (UNSPEC_FRNDINT_FLOOR 70) - (UNSPEC_FRNDINT_CEIL 71) - (UNSPEC_FRNDINT_TRUNC 72) - (UNSPEC_FRNDINT_MASK_PM 73) - (UNSPEC_FIST_FLOOR 74) - (UNSPEC_FIST_CEIL 75) - - ; x87 Double output FP - (UNSPEC_SINCOS_COS 80) - (UNSPEC_SINCOS_SIN 81) - (UNSPEC_XTRACT_FRACT 84) - (UNSPEC_XTRACT_EXP 85) - (UNSPEC_FSCALE_FRACT 86) - (UNSPEC_FSCALE_EXP 87) - (UNSPEC_FPREM_F 88) - (UNSPEC_FPREM_U 89) - (UNSPEC_FPREM1_F 90) - (UNSPEC_FPREM1_U 91) - - (UNSPEC_C2_FLAG 95) - (UNSPEC_FXAM_MEM 96) - - ; SSP patterns - (UNSPEC_SP_SET 100) - (UNSPEC_SP_TEST 101) - (UNSPEC_SP_TLS_SET 102) - (UNSPEC_SP_TLS_TEST 103) - - ; SSSE3 - (UNSPEC_PSHUFB 120) - (UNSPEC_PSIGN 121) - (UNSPEC_PALIGNR 122) - - ; For SSE4A support - (UNSPEC_EXTRQI 130) - (UNSPEC_EXTRQ 131) - (UNSPEC_INSERTQI 132) - (UNSPEC_INSERTQ 133) - - ; For SSE4.1 support - (UNSPEC_BLENDV 134) - (UNSPEC_INSERTPS 135) - (UNSPEC_DP 136) - (UNSPEC_MOVNTDQA 137) - (UNSPEC_MPSADBW 138) - (UNSPEC_PHMINPOSUW 139) - (UNSPEC_PTEST 140) - (UNSPEC_ROUND 141) - - ; For SSE4.2 support - (UNSPEC_CRC32 143) - (UNSPEC_PCMPESTR 144) - (UNSPEC_PCMPISTR 145) - - ; For FMA4 support - (UNSPEC_FMA4_INTRINSIC 150) - (UNSPEC_FMA4_FMADDSUB 151) - (UNSPEC_FMA4_FMSUBADD 152) - (UNSPEC_XOP_UNSIGNED_CMP 151) - (UNSPEC_XOP_TRUEFALSE 152) - (UNSPEC_XOP_PERMUTE 153) - (UNSPEC_FRCZ 154) - - ; For AES support - (UNSPEC_AESENC 159) - (UNSPEC_AESENCLAST 160) - (UNSPEC_AESDEC 161) - (UNSPEC_AESDECLAST 162) - (UNSPEC_AESIMC 163) - (UNSPEC_AESKEYGENASSIST 164) - - ; For PCLMUL support - (UNSPEC_PCLMUL 165) - - ; For AVX support - (UNSPEC_PCMP 166) - (UNSPEC_VPERMIL 167) - (UNSPEC_VPERMIL2 168) - (UNSPEC_VPERMIL2F128 169) - (UNSPEC_MASKLOAD 170) - (UNSPEC_MASKSTORE 171) - (UNSPEC_CAST 172) - (UNSPEC_VTESTP 173) - ]) - -(define_constants - [(UNSPECV_BLOCKAGE 0) - (UNSPECV_STACK_PROBE 1) - (UNSPECV_EMMS 2) - (UNSPECV_LDMXCSR 3) - (UNSPECV_STMXCSR 4) - (UNSPECV_FEMMS 5) - (UNSPECV_CLFLUSH 6) - (UNSPECV_ALIGN 7) - (UNSPECV_MONITOR 8) - (UNSPECV_MWAIT 9) - (UNSPECV_CMPXCHG 10) - (UNSPECV_XCHG 12) - (UNSPECV_LOCK 13) - (UNSPECV_PROLOGUE_USE 14) - (UNSPECV_CLD 15) - (UNSPECV_VZEROALL 16) - (UNSPECV_VZEROUPPER 17) - (UNSPECV_RDTSC 18) - (UNSPECV_RDTSCP 19) - (UNSPECV_RDPMC 20) - (UNSPECV_VSWAPMOV 21) - (UNSPECV_LLWP_INTRINSIC 22) - (UNSPECV_SLWP_INTRINSIC 23) - (UNSPECV_LWPVAL_INTRINSIC 24) - (UNSPECV_LWPINS_INTRINSIC 25) - ]) +(define_c_enum "unspec" [ + ;; Relocation specifiers + UNSPEC_GOT + UNSPEC_GOTOFF + UNSPEC_GOTPCREL + UNSPEC_GOTTPOFF + UNSPEC_TPOFF + UNSPEC_NTPOFF + UNSPEC_DTPOFF + UNSPEC_GOTNTPOFF + UNSPEC_INDNTPOFF + UNSPEC_PLTOFF + UNSPEC_MACHOPIC_OFFSET + UNSPEC_PCREL + + ;; Prologue support + UNSPEC_STACK_ALLOC + UNSPEC_SET_GOT + UNSPEC_REG_SAVE + UNSPEC_DEF_CFA + UNSPEC_SET_RIP + UNSPEC_SET_GOT_OFFSET + UNSPEC_MEMORY_BLOCKAGE + UNSPEC_STACK_CHECK + + ;; TLS support + UNSPEC_TP + UNSPEC_TLS_GD + UNSPEC_TLS_LD_BASE + UNSPEC_TLSDESC + UNSPEC_TLS_IE_SUN + + ;; Other random patterns + UNSPEC_SCAS + UNSPEC_FNSTSW + UNSPEC_SAHF + UNSPEC_PARITY + UNSPEC_FSTCW + UNSPEC_ADD_CARRY + UNSPEC_FLDCW + UNSPEC_REP + UNSPEC_LD_MPIC ; load_macho_picbase + UNSPEC_TRUNC_NOOP + UNSPEC_DIV_ALREADY_SPLIT + UNSPEC_CALL_NEEDS_VZEROUPPER + + ;; For SSE/MMX support: + UNSPEC_FIX_NOTRUNC + UNSPEC_MASKMOV + UNSPEC_MOVMSK + UNSPEC_MOVNT + UNSPEC_MOVU + UNSPEC_RCP + UNSPEC_RSQRT + UNSPEC_SFENCE + UNSPEC_PFRCP + UNSPEC_PFRCPIT1 + UNSPEC_PFRCPIT2 + UNSPEC_PFRSQRT + UNSPEC_PFRSQIT1 + UNSPEC_MFENCE + UNSPEC_LFENCE + UNSPEC_PSADBW + UNSPEC_LDDQU + UNSPEC_MS_TO_SYSV_CALL + + ;; Generic math support + UNSPEC_COPYSIGN + UNSPEC_IEEE_MIN ; not commutative + UNSPEC_IEEE_MAX ; not commutative + + ;; x87 Floating point + UNSPEC_SIN + UNSPEC_COS + UNSPEC_FPATAN + UNSPEC_FYL2X + UNSPEC_FYL2XP1 + UNSPEC_FRNDINT + UNSPEC_FIST + UNSPEC_F2XM1 + UNSPEC_TAN + UNSPEC_FXAM + + ;; x87 Rounding + UNSPEC_FRNDINT_FLOOR + UNSPEC_FRNDINT_CEIL + UNSPEC_FRNDINT_TRUNC + UNSPEC_FRNDINT_MASK_PM + UNSPEC_FIST_FLOOR + UNSPEC_FIST_CEIL + + ;; x87 Double output FP + UNSPEC_SINCOS_COS + UNSPEC_SINCOS_SIN + UNSPEC_XTRACT_FRACT + UNSPEC_XTRACT_EXP + UNSPEC_FSCALE_FRACT + UNSPEC_FSCALE_EXP + UNSPEC_FPREM_F + UNSPEC_FPREM_U + UNSPEC_FPREM1_F + UNSPEC_FPREM1_U + + UNSPEC_C2_FLAG + UNSPEC_FXAM_MEM + + ;; SSP patterns + UNSPEC_SP_SET + UNSPEC_SP_TEST + UNSPEC_SP_TLS_SET + UNSPEC_SP_TLS_TEST + + ;; SSSE3 + UNSPEC_PSHUFB + UNSPEC_PSIGN + UNSPEC_PALIGNR + + ;; For SSE4A support + UNSPEC_EXTRQI + UNSPEC_EXTRQ + UNSPEC_INSERTQI + UNSPEC_INSERTQ + + ;; For SSE4.1 support + UNSPEC_BLENDV + UNSPEC_INSERTPS + UNSPEC_DP + UNSPEC_MOVNTDQA + UNSPEC_MPSADBW + UNSPEC_PHMINPOSUW + UNSPEC_PTEST + UNSPEC_ROUND + + ;; For SSE4.2 support + UNSPEC_CRC32 + UNSPEC_PCMPESTR + UNSPEC_PCMPISTR + + ;; For FMA4 support + UNSPEC_FMADDSUB + UNSPEC_XOP_UNSIGNED_CMP + UNSPEC_XOP_TRUEFALSE + UNSPEC_XOP_PERMUTE + UNSPEC_FRCZ + + ;; For AES support + UNSPEC_AESENC + UNSPEC_AESENCLAST + UNSPEC_AESDEC + UNSPEC_AESDECLAST + UNSPEC_AESIMC + UNSPEC_AESKEYGENASSIST + + ;; For PCLMUL support + UNSPEC_PCLMUL + + ;; For AVX support + UNSPEC_PCMP + UNSPEC_VPERMIL + UNSPEC_VPERMIL2 + UNSPEC_VPERMIL2F128 + UNSPEC_MASKLOAD + UNSPEC_MASKSTORE + UNSPEC_CAST + UNSPEC_VTESTP + UNSPEC_VCVTPH2PS + UNSPEC_VCVTPS2PH + + ;; For BMI support + UNSPEC_BEXTR + + ;; For RDRAND support + UNSPEC_RDRAND +]) + +(define_c_enum "unspecv" [ + UNSPECV_BLOCKAGE + UNSPECV_STACK_PROBE + UNSPECV_PROBE_STACK_RANGE + UNSPECV_EMMS + UNSPECV_LDMXCSR + UNSPECV_STMXCSR + UNSPECV_FEMMS + UNSPECV_CLFLUSH + UNSPECV_ALIGN + UNSPECV_MONITOR + UNSPECV_MWAIT + UNSPECV_CMPXCHG + UNSPECV_XCHG + UNSPECV_LOCK + UNSPECV_PROLOGUE_USE + UNSPECV_CLD + UNSPECV_NOPS + UNSPECV_VZEROALL + UNSPECV_VZEROUPPER + UNSPECV_RDTSC + UNSPECV_RDTSCP + UNSPECV_RDPMC + UNSPECV_LLWP_INTRINSIC + UNSPECV_SLWP_INTRINSIC + UNSPECV_LWPVAL_INTRINSIC + UNSPECV_LWPINS_INTRINSIC + UNSPECV_RDFSBASE + UNSPECV_RDGSBASE + UNSPECV_WRFSBASE + UNSPECV_WRGSBASE + UNSPECV_SPLIT_STACK_RETURN +]) ;; Constants to represent pcomtrue/pcomfalse variants (define_constants @@ -342,8 +359,8 @@ ;; Processor type. -(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom, - generic64,amdfam10,bdver1" +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, + atom,generic64,amdfam10,bdver1,btver1" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be @@ -356,7 +373,7 @@ push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, - sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, ssemuladd,sse4arg,lwp, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" @@ -371,7 +388,7 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") - (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") (const_string "sse") @@ -452,7 +469,7 @@ ;; Set when REX opcode prefix is used. (define_attr "prefix_rex" "" - (cond [(ne (symbol_ref "!TARGET_64BIT") (const_int 0)) + (cond [(eq (symbol_ref "TARGET_64BIT") (const_int 0)) (const_int 0) (and (eq_attr "mode" "DI") (and (eq_attr "type" "!push,pop,call,callv,leave,ibr") @@ -677,13 +694,6 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) -;; All integer comparison codes. -(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu]) - -;; All floating-point comparison codes. -(define_code_iterator fp_cond [unordered ordered - uneq unge ungt unle unlt ltgt]) - (define_code_iterator plusminus [plus minus]) (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) @@ -710,9 +720,6 @@ ;; Mapping of unsigned max and min (define_code_iterator umaxmin [umax umin]) -;; Mapping of signed/unsigned max and min -(define_code_iterator maxmin [smax smin umax umin]) - ;; Base name for integer and FP insn mnemonic (define_code_attr maxmin_int [(smax "maxs") (smin "mins") (umax "maxu") (umin "minu")]) @@ -764,12 +771,24 @@ (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "") (div "i") (udiv "")]) -;; All single word integer modes. +;; 64bit single word integer modes. +(define_mode_iterator SWI1248x [QI HI SI DI]) + +;; 64bit single word integer modes without QImode and HImode. +(define_mode_iterator SWI48x [SI DI]) + +;; Single word integer modes. (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) +;; Single word integer modes without SImode and DImode. +(define_mode_iterator SWI12 [QI HI]) + ;; Single word integer modes without DImode. (define_mode_iterator SWI124 [QI HI SI]) +;; Single word integer modes without QImode and DImode. +(define_mode_iterator SWI24 [HI SI]) + ;; Single word integer modes without QImode. (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) @@ -810,6 +829,12 @@ ;; Instruction suffix for integer modes. (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) +;; Pointer size prefix for integer modes (Intel asm dialect) +(define_mode_attr iptrsize [(QI "BYTE") + (HI "WORD") + (SI "DWORD") + (DI "QWORD")]) + ;; Register class for integer modes. (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) @@ -840,6 +865,20 @@ (SI "general_operand") (DI "x86_64_szext_general_operand")]) +;; Immediate operand predicate for integer modes. +(define_mode_attr immediate_operand + [(QI "immediate_operand") + (HI "immediate_operand") + (SI "immediate_operand") + (DI "x86_64_immediate_operand")]) + +;; Nonmemory operand predicate for integer modes. +(define_mode_attr nonmemory_operand + [(QI "nonmemory_operand") + (HI "nonmemory_operand") + (SI "nonmemory_operand") + (DI "x86_64_nonmemory_operand")]) + ;; Operand predicate for shifts. (define_mode_attr shift_operand [(QI "nonimmediate_operand") @@ -898,8 +937,10 @@ (include "ppro.md") (include "k6.md") (include "athlon.md") +(include "bdver1.md") (include "geode.md") (include "atom.md") +(include "core2.md") ;; Operand and operator predicates and constraints @@ -915,7 +956,7 @@ (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand" "") (match_operand:SDWIM 2 "<general_operand>" ""))) (set (pc) (if_then_else - (match_operator 0 "comparison_operator" + (match_operator 0 "ordered_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3 "" "")) (pc)))] @@ -923,9 +964,8 @@ { if (MEM_P (operands[1]) && MEM_P (operands[2])) operands[1] = force_reg (<MODE>mode, operands[1]); - ix86_compare_op0 = operands[1]; - ix86_compare_op1 = operands[2]; - ix86_expand_branch (GET_CODE (operands[0]), operands[3]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); DONE; }) @@ -934,24 +974,21 @@ (compare:CC (match_operand:SWIM 2 "nonimmediate_operand" "") (match_operand:SWIM 3 "<general_operand>" ""))) (set (match_operand:QI 0 "register_operand" "") - (match_operator 1 "comparison_operator" + (match_operator 1 "ordered_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "" { if (MEM_P (operands[2]) && MEM_P (operands[3])) operands[2] = force_reg (<MODE>mode, operands[2]); - ix86_compare_op0 = operands[2]; - ix86_compare_op1 = operands[3]; - ix86_expand_setcc (GET_CODE (operands[1]), operands[0]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); DONE; }) (define_expand "cmp<mode>_1" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWI48 0 "nonimmediate_operand" "") - (match_operand:SWI48 1 "<general_operand>" "")))] - "" - "") + (match_operand:SWI48 1 "<general_operand>" "")))]) (define_insn "*cmp<mode>_ccno_1" [(set (reg FLAGS_REG) @@ -1036,9 +1073,7 @@ (match_operand 0 "ext_register_operand" "") (const_int 8) (const_int 8)) 0) - (match_operand:QI 1 "immediate_operand" "")))] - "" - "") + (match_operand:QI 1 "immediate_operand" "")))]) (define_insn "*cmpqi_ext_3_insn" [(set (reg FLAGS_REG) @@ -1105,9 +1140,8 @@ (pc)))] "TARGET_80387" { - ix86_compare_op0 = operands[1]; - ix86_compare_op1 = operands[2]; - ix86_expand_branch (GET_CODE (operands[0]), operands[3]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); DONE; }) @@ -1121,9 +1155,8 @@ (const_int 0)]))] "TARGET_80387" { - ix86_compare_op0 = operands[2]; - ix86_compare_op1 = operands[3]; - ix86_expand_setcc (GET_CODE (operands[1]), operands[0]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); DONE; }) @@ -1139,9 +1172,8 @@ (pc)))] "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { - ix86_compare_op0 = operands[1]; - ix86_compare_op1 = operands[2]; - ix86_expand_branch (GET_CODE (operands[0]), operands[3]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); DONE; }) @@ -1155,9 +1187,8 @@ (const_int 0)]))] "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { - ix86_compare_op0 = operands[2]; - ix86_compare_op1 = operands[3]; - ix86_expand_setcc (GET_CODE (operands[1]), operands[0]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); DONE; }) @@ -1170,9 +1201,8 @@ (pc)))] "" { - ix86_compare_op0 = operands[1]; - ix86_compare_op1 = operands[2]; - ix86_expand_branch (GET_CODE (operands[0]), operands[3]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); DONE; }) @@ -1183,9 +1213,8 @@ (match_operand 3 "const0_operand" "")]))] "" { - ix86_compare_op0 = operands[2]; - ix86_compare_op1 = operands[3]; - ix86_expand_setcc (GET_CODE (operands[1]), operands[0]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); DONE; }) @@ -1438,6 +1467,7 @@ [(set_attr "length" "1") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;; Pentium Pro can do steps 1 through 3 in one go. @@ -1468,7 +1498,8 @@ ] (const_string "0"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) (define_insn "*cmpfp_i_sse" [(set (reg:CCFP FLAGS_REG) @@ -1490,7 +1521,8 @@ (const_string "1") (const_string "0"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) (define_insn "*cmpfp_i_i387" [(set (reg:CCFP FLAGS_REG) @@ -1510,7 +1542,8 @@ ] (const_string "XF"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) (define_insn "*cmpfp_iu_mixed" [(set (reg:CCFPU FLAGS_REG) @@ -1538,7 +1571,8 @@ ] (const_string "0"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) (define_insn "*cmpfp_iu_sse" [(set (reg:CCFPU FLAGS_REG) @@ -1560,7 +1594,8 @@ (const_string "1") (const_string "0"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) (define_insn "*cmpfp_iu_387" [(set (reg:CCFPU FLAGS_REG) @@ -1580,773 +1615,25 @@ ] (const_string "XF"))) (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) -;; Move instructions. - -;; General case of fullword move. - -(define_expand "movsi" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SI 1 "general_operand" ""))] - "" - "ix86_expand_move (SImode, operands); DONE;") - -;; Push/pop instructions. They are separate since autoinc/dec is not a -;; general_operand. -;; -;; %%% We don't use a post-inc memory reference because x86 is not a -;; general AUTO_INC_DEC host, which impacts how it is treated in flow. -;; Changing this impacts compiler performance on other non-AUTO_INC_DEC -;; targets without our curiosities, and it is just as easy to represent -;; this differently. - -(define_insn "*pushsi2" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] - "!TARGET_64BIT" - "push{l}\t%1" - [(set_attr "type" "push") - (set_attr "mode" "SI")]) - -;; For 64BIT abi we always round up to 8 bytes. -(define_insn "*pushsi2_rex64" - [(set (match_operand:SI 0 "push_operand" "=X") - (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))] - "TARGET_64BIT" - "push{q}\t%q1" - [(set_attr "type" "push") - (set_attr "mode" "SI")]) - -(define_insn "*pushsi2_prologue" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "general_no_elim_operand" "ri*m")) - (clobber (mem:BLK (scratch)))] - "!TARGET_64BIT" - "push{l}\t%1" - [(set_attr "type" "push") - (set_attr "mode" "SI")]) - -(define_insn "*popsi1_epilogue" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) (const_int 4))) - (clobber (mem:BLK (scratch)))] - "!TARGET_64BIT" - "pop{l}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "SI")]) - -(define_insn "popsi1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) (const_int 4)))] - "!TARGET_64BIT" - "pop{l}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "SI")]) - -(define_insn "*movsi_xor" - [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "const0_operand" "")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed" - "xor{l}\t%0, %0" - [(set_attr "type" "alu1") - (set_attr "mode" "SI") - (set_attr "length_immediate" "0")]) - -(define_insn "*movsi_or" - [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "immediate_operand" "i")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && operands[1] == constm1_rtx" -{ - operands[1] = constm1_rtx; - return "or{l}\t{%1, %0|%0, %1}"; -} - [(set_attr "type" "alu1") - (set_attr "mode" "SI") - (set_attr "length_immediate" "1")]) - -(define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") - (match_operand:SI 1 "general_operand" - "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] - "!(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_SSELOG1: - if (get_attr_mode (insn) == MODE_TI) - return "%vpxor\t%0, %d0"; - return "%vxorps\t%0, %d0"; - - case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_TI: - return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - case MODE_SI: - return "%vmovd\t{%1, %0|%0, %1}"; - case MODE_SF: - return "%vmovss\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } - - case TYPE_MMX: - return "pxor\t%0, %0"; - - case TYPE_MMXMOV: - if (get_attr_mode (insn) == MODE_DI) - return "movq\t{%1, %0|%0, %1}"; - return "movd\t{%1, %0|%0, %1}"; - - case TYPE_LEA: - return "lea{l}\t{%1, %0|%0, %1}"; - - default: - gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); - return "mov{l}\t{%1, %0|%0, %1}"; - } -} - [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "mmx") - (eq_attr "alternative" "3,4,5") - (const_string "mmxmov") - (eq_attr "alternative" "6") - (const_string "sselog1") - (eq_attr "alternative" "7,8,9,10,11") - (const_string "ssemov") - (match_operand:DI 1 "pic_32bit_operand" "") - (const_string "lea") - ] - (const_string "imov"))) - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") - (const_string "orig") - (const_string "maybe_vex"))) - (set (attr "prefix_data16") - (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) - (const_string "1") - (const_string "*"))) - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (const_string "DI") - (eq_attr "alternative" "6,7") - (if_then_else - (eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (and (eq_attr "alternative" "8,9,10,11") - (eq (symbol_ref "TARGET_SSE2") (const_int 0))) - (const_string "SF") - ] - (const_string "SI")))]) - -;; Stores and loads of ax to arbitrary constant address. -;; We fake an second form of instruction to force reload to load address -;; into register when rax is not available -(define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT && ix86_check_movabs (insn, 0)" - "@ - movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0,*") - (set_attr "memory" "store") - (set_attr "mode" "SI")]) - -(define_insn "*movabssi_2_rex64" - [(set (match_operand:SI 0 "register_operand" "=a,r") - (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT && ix86_check_movabs (insn, 1)" - "@ - movabs{l}\t{%P1, %0|%0, %P1} - mov{l}\t{%a1, %0|%0, %a1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0") - (set_attr "memory" "load") - (set_attr "mode" "SI")]) - -(define_insn "*swapsi" - [(set (match_operand:SI 0 "register_operand" "+r") - (match_operand:SI 1 "register_operand" "+r")) - (set (match_dup 1) - (match_dup 0))] - "" - "xchg{l}\t%1, %0" - [(set_attr "type" "imov") - (set_attr "mode" "SI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) - -(define_expand "movhi" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (match_operand:HI 1 "general_operand" ""))] - "" - "ix86_expand_move (HImode, operands); DONE;") - -(define_insn "*pushhi2" - [(set (match_operand:HI 0 "push_operand" "=X") - (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] - "!TARGET_64BIT" - "push{l}\t%k1" - [(set_attr "type" "push") - (set_attr "mode" "SI")]) - -;; For 64BIT abi we always round up to 8 bytes. -(define_insn "*pushhi2_rex64" - [(set (match_operand:HI 0 "push_operand" "=X") - (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] - "TARGET_64BIT" - "push{q}\t%q1" - [(set_attr "type" "push") - (set_attr "mode" "DI")]) - -(define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") - (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - /* movzwl is faster than movw on p2 due to partial word stalls, - though not as fast as an aligned movl. */ - return "movz{wl|x}\t{%1, %k0|%k0, %1}"; - default: - if (get_attr_mode (insn) == MODE_SI) - return "mov{l}\t{%k1, %k0|%k0, %k1}"; - else - return "mov{w}\t{%1, %0|%0, %1}"; - } -} - [(set (attr "type") - (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) - (const_string "imov") - (and (eq_attr "alternative" "0") - (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") - (const_int 0)) - (eq (symbol_ref "TARGET_HIMODE_MATH") - (const_int 0)))) - (const_string "imov") - (and (eq_attr "alternative" "1,2") - (match_operand:HI 1 "aligned_operand" "")) - (const_string "imov") - (and (ne (symbol_ref "TARGET_MOVX") - (const_int 0)) - (eq_attr "alternative" "0,2")) - (const_string "imovx") - ] - (const_string "imov"))) - (set (attr "mode") - (cond [(eq_attr "type" "imovx") - (const_string "SI") - (and (eq_attr "alternative" "1,2") - (match_operand:HI 1 "aligned_operand" "")) - (const_string "SI") - (and (eq_attr "alternative" "0") - (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") - (const_int 0)) - (eq (symbol_ref "TARGET_HIMODE_MATH") - (const_int 0)))) - (const_string "SI") - ] - (const_string "HI")))]) - -;; Stores and loads of ax to arbitrary constant address. -;; We fake an second form of instruction to force reload to load address -;; into register when rax is not available -(define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT && ix86_check_movabs (insn, 0)" - "@ - movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0,*") - (set_attr "memory" "store") - (set_attr "mode" "HI")]) - -(define_insn "*movabshi_2_rex64" - [(set (match_operand:HI 0 "register_operand" "=a,r") - (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT && ix86_check_movabs (insn, 1)" - "@ - movabs{w}\t{%P1, %0|%0, %P1} - mov{w}\t{%a1, %0|%0, %a1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0") - (set_attr "memory" "load") - (set_attr "mode" "HI")]) - -(define_insn "*swaphi_1" - [(set (match_operand:HI 0 "register_operand" "+r") - (match_operand:HI 1 "register_operand" "+r")) - (set (match_dup 1) - (match_dup 0))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "xchg{l}\t%k1, %k0" - [(set_attr "type" "imov") - (set_attr "mode" "SI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) - -;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 -(define_insn "*swaphi_2" - [(set (match_operand:HI 0 "register_operand" "+r") - (match_operand:HI 1 "register_operand" "+r")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{w}\t%1, %0" - [(set_attr "type" "imov") - (set_attr "mode" "HI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) - -(define_expand "movstricthi" - [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "")) - (match_operand:HI 1 "general_operand" ""))] - "" -{ - if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) - FAIL; - /* Don't generate memory->memory moves, go through a register */ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (HImode, operands[1]); -}) - -(define_insn "*movstricthi_1" - [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r")) - (match_operand:HI 1 "general_operand" "rn,m"))] - "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "mov{w}\t{%1, %0|%0, %1}" - [(set_attr "type" "imov") - (set_attr "mode" "HI")]) - -(define_insn "*movstricthi_xor" - [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) - (match_operand:HI 1 "const0_operand" "")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed" - "xor{w}\t%0, %0" - [(set_attr "type" "alu1") - (set_attr "mode" "HI") - (set_attr "length_immediate" "0")]) - -(define_expand "movqi" - [(set (match_operand:QI 0 "nonimmediate_operand" "") - (match_operand:QI 1 "general_operand" ""))] - "" - "ix86_expand_move (QImode, operands); DONE;") - -;; emit_push_insn when it calls move_by_pieces requires an insn to -;; "push a byte". But actually we use pushl, which has the effect -;; of rounding the amount pushed up to a word. - -(define_insn "*pushqi2" - [(set (match_operand:QI 0 "push_operand" "=X") - (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))] - "!TARGET_64BIT" - "push{l}\t%k1" - [(set_attr "type" "push") - (set_attr "mode" "SI")]) - -;; For 64BIT abi we always round up to 8 bytes. -(define_insn "*pushqi2_rex64" - [(set (match_operand:QI 0 "push_operand" "=X") - (match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))] - "TARGET_64BIT" - "push{q}\t%q1" - [(set_attr "type" "push") - (set_attr "mode" "DI")]) - -;; Situation is quite tricky about when to choose full sized (SImode) move -;; over QImode moves. For Q_REG -> Q_REG move we use full size only for -;; partial register dependency machines (such as AMD Athlon), where QImode -;; moves issue extra dependency and for partial register stalls machines -;; that don't use QImode patterns (and QImode move cause stall on the next -;; instruction). -;; -;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial -;; register stall machines with, where we use QImode instructions, since -;; partial register stall can be caused there. Then we use movzx. -(define_insn "*movqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") - (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); - return "movz{bl|x}\t{%1, %k0|%k0, %1}"; - default: - if (get_attr_mode (insn) == MODE_SI) - return "mov{l}\t{%k1, %k0|%k0, %k1}"; - else - return "mov{b}\t{%1, %0|%0, %1}"; - } -} - [(set (attr "type") - (cond [(and (eq_attr "alternative" "5") - (not (match_operand:QI 1 "aligned_operand" ""))) - (const_string "imovx") - (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) - (const_string "imov") - (and (eq_attr "alternative" "3") - (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") - (const_int 0)) - (eq (symbol_ref "TARGET_QIMODE_MATH") - (const_int 0)))) - (const_string "imov") - (eq_attr "alternative" "3,5") - (const_string "imovx") - (and (ne (symbol_ref "TARGET_MOVX") - (const_int 0)) - (eq_attr "alternative" "2")) - (const_string "imovx") - ] - (const_string "imov"))) - (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,5") - (const_string "SI") - (eq_attr "alternative" "6") - (const_string "QI") - (eq_attr "type" "imovx") - (const_string "SI") - (and (eq_attr "type" "imov") - (and (eq_attr "alternative" "0,1") - (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") - (const_int 0)) - (and (eq (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") - (const_int 0)))))) - (const_string "SI") - ;; Avoid partial register stalls when not using QImode arithmetic - (and (eq_attr "type" "imov") - (and (eq_attr "alternative" "0,1") - (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") - (const_int 0)) - (eq (symbol_ref "TARGET_QIMODE_MATH") - (const_int 0))))) - (const_string "SI") - ] - (const_string "QI")))]) - -(define_insn "*swapqi_1" - [(set (match_operand:QI 0 "register_operand" "+r") - (match_operand:QI 1 "register_operand" "+r")) - (set (match_dup 1) - (match_dup 0))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "xchg{l}\t%k1, %k0" - [(set_attr "type" "imov") - (set_attr "mode" "SI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) - -;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 -(define_insn "*swapqi_2" - [(set (match_operand:QI 0 "register_operand" "+q") - (match_operand:QI 1 "register_operand" "+q")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{b}\t%1, %0" - [(set_attr "type" "imov") - (set_attr "mode" "QI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) - -(define_expand "movstrictqi" - [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) - (match_operand:QI 1 "general_operand" ""))] - "" -{ - if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) - FAIL; - /* Don't generate memory->memory moves, go through a register. */ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (QImode, operands[1]); -}) - -(define_insn "*movstrictqi_1" - [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) - (match_operand:QI 1 "general_operand" "*qn,m"))] - "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "mov{b}\t{%1, %0|%0, %1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - -(define_insn "*movstrictqi_xor" - [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) - (match_operand:QI 1 "const0_operand" "")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed" - "xor{b}\t%0, %0" - [(set_attr "type" "alu1") - (set_attr "mode" "QI") - (set_attr "length_immediate" "0")]) - -(define_insn "*movsi_extv_1" - [(set (match_operand:SI 0 "register_operand" "=R") - (sign_extract:SI (match_operand 1 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))] - "" - "movs{bl|x}\t{%h1, %0|%0, %h1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -(define_insn "*movhi_extv_1" - [(set (match_operand:HI 0 "register_operand" "=R") - (sign_extract:HI (match_operand 1 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))] - "" - "movs{bl|x}\t{%h1, %k0|%k0, %h1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -(define_insn "*movqi_extv_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") - (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") - (const_int 8) - (const_int 8)))] - "!TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; - default: - return "mov{b}\t{%h1, %0|%0, %h1}"; - } -} - [(set (attr "type") - (if_then_else (and (match_operand:QI 0 "register_operand" "") - (ior (not (match_operand:QI 0 "q_regs_operand" "")) - (ne (symbol_ref "TARGET_MOVX") - (const_int 0)))) - (const_string "imovx") - (const_string "imov"))) - (set (attr "mode") - (if_then_else (eq_attr "type" "imovx") - (const_string "SI") - (const_string "QI")))]) - -(define_insn "*movqi_extv_1_rex64" - [(set (match_operand:QI 0 "register_operand" "=Q,?R") - (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") - (const_int 8) - (const_int 8)))] - "TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; - default: - return "mov{b}\t{%h1, %0|%0, %h1}"; - } -} - [(set (attr "type") - (if_then_else (and (match_operand:QI 0 "register_operand" "") - (ior (not (match_operand:QI 0 "q_regs_operand" "")) - (ne (symbol_ref "TARGET_MOVX") - (const_int 0)))) - (const_string "imovx") - (const_string "imov"))) - (set (attr "mode") - (if_then_else (eq_attr "type" "imovx") - (const_string "SI") - (const_string "QI")))]) - -;; Stores and loads of ax to arbitrary constant address. -;; We fake an second form of instruction to force reload to load address -;; into register when rax is not available -(define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT && ix86_check_movabs (insn, 0)" - "@ - movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0,*") - (set_attr "memory" "store") - (set_attr "mode" "QI")]) - -(define_insn "*movabsqi_2_rex64" - [(set (match_operand:QI 0 "register_operand" "=a,r") - (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT && ix86_check_movabs (insn, 1)" - "@ - movabs{b}\t{%P1, %0|%0, %P1} - mov{b}\t{%a1, %0|%0, %a1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0") - (set_attr "memory" "load") - (set_attr "mode" "QI")]) - -(define_insn "*movdi_extzv_1" - [(set (match_operand:DI 0 "register_operand" "=R") - (zero_extract:DI (match_operand 1 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))] - "TARGET_64BIT" - "movz{bl|x}\t{%h1, %k0|%k0, %h1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -(define_insn "*movsi_extzv_1" - [(set (match_operand:SI 0 "register_operand" "=R") - (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))] - "" - "movz{bl|x}\t{%h1, %0|%0, %h1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -(define_insn "*movqi_extzv_2" - [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") - (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0))] - "!TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; - default: - return "mov{b}\t{%h1, %0|%0, %h1}"; - } -} - [(set (attr "type") - (if_then_else (and (match_operand:QI 0 "register_operand" "") - (ior (not (match_operand:QI 0 "q_regs_operand" "")) - (ne (symbol_ref "TARGET_MOVX") - (const_int 0)))) - (const_string "imovx") - (const_string "imov"))) - (set (attr "mode") - (if_then_else (eq_attr "type" "imovx") - (const_string "SI") - (const_string "QI")))]) - -(define_insn "*movqi_extzv_2_rex64" - [(set (match_operand:QI 0 "register_operand" "=Q,?R") - (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0))] - "TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOVX: - return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; - default: - return "mov{b}\t{%h1, %0|%0, %h1}"; - } -} - [(set (attr "type") - (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) - (ne (symbol_ref "TARGET_MOVX") - (const_int 0))) - (const_string "imovx") - (const_string "imov"))) - (set (attr "mode") - (if_then_else (eq_attr "type" "imovx") - (const_string "SI") - (const_string "QI")))]) - -(define_insn "movsi_insv_1" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (match_operand:SI 1 "general_operand" "Qmn"))] - "!TARGET_64BIT" - "mov{b}\t{%b1, %h0|%h0, %b1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - -(define_insn "*movsi_insv_1_rex64" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (match_operand:SI 1 "nonmemory_operand" "Qn"))] - "TARGET_64BIT" - "mov{b}\t{%b1, %h0|%h0, %b1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - -(define_insn "movdi_insv_1_rex64" - [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (match_operand:DI 1 "nonmemory_operand" "Qn"))] - "TARGET_64BIT" - "mov{b}\t{%b1, %h0|%h0, %b1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - -(define_insn "*movqi_insv_2" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") - (const_int 8)))] - "" - "mov{b}\t{%h1, %h0|%h0, %h1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - -(define_expand "movdi" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:DI 1 "general_operand" ""))] - "" - "ix86_expand_move (DImode, operands); DONE;") - -(define_insn "*pushdi" - [(set (match_operand:DI 0 "push_operand" "=<") - (match_operand:DI 1 "general_no_elim_operand" "riF*m"))] - "!TARGET_64BIT" +;; Push/pop instructions. + +(define_insn "*push<mode>2" + [(set (match_operand:DWI 0 "push_operand" "=<") + (match_operand:DWI 1 "general_no_elim_operand" "riF*m"))] + "" "#") +(define_split + [(set (match_operand:TI 0 "push_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + (define_insn "*pushdi2_rex64" [(set (match_operand:DI 0 "push_operand" "=<,!<") (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] @@ -2368,8 +1655,7 @@ "TARGET_64BIT && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. @@ -2382,7 +1668,7 @@ [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { - split_di (&operands[1], 1, &operands[2], &operands[3]); + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, @@ -2399,115 +1685,280 @@ [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { - split_di (&operands[1], 1, &operands[2], &operands[3]); + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, GEN_INT (4))); }) -(define_insn "*pushdi2_prologue_rex64" - [(set (match_operand:DI 0 "push_operand" "=<") - (match_operand:DI 1 "general_no_elim_operand" "re*m")) - (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" - "push{q}\t%1" - [(set_attr "type" "push") - (set_attr "mode" "DI")]) - -(define_insn "*popdi1_epilogue_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) - (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" - "pop{q}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "DI")]) - -(define_insn "popdi1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) - (plus:DI (reg:DI SP_REG) (const_int 8)))] - "TARGET_64BIT" - "pop{q}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "DI")]) - -(define_insn "*movdi_xor_rex64" - [(set (match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "const0_operand" "")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT - && reload_completed" - "xor{l}\t%k0, %k0"; - [(set_attr "type" "alu1") - (set_attr "mode" "SI") - (set_attr "length_immediate" "0")]) - -(define_insn "*movdi_or_rex64" - [(set (match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "const_int_operand" "i")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT - && reload_completed - && operands[1] == constm1_rtx" -{ - operands[1] = constm1_rtx; - return "or{q}\t{%1, %0|%0, %1}"; -} - [(set_attr "type" "alu1") - (set_attr "mode" "DI") - (set_attr "length_immediate" "1")]) - -(define_insn "*movdi_2" - [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x") - (match_operand:DI 1 "general_operand" - "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))] - "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "@ - # - # - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - %vpxor\t%0, %d0 - %vmovq\t{%1, %0|%0, %1} - %vmovdqa\t{%1, %0|%0, %1} - %vmovq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movlps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "5,6,7,8") - (const_string "vex") - (const_string "orig"))) - (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) - (define_split [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "general_operand" ""))] "!TARGET_64BIT && reload_completed - && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") -;; %%% This multiword shite has got to go. -(define_split - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:DI 1 "general_operand" ""))] - "!TARGET_64BIT && reload_completed - && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0])) - && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte/word". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. + +;; For TARGET_64BIT we always round up to 8 bytes. +(define_insn "*push<mode>2_rex64" + [(set (match_operand:SWI124 0 "push_operand" "=X") + (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*push<mode>2" + [(set (match_operand:SWI12 0 "push_operand" "=X") + (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*push<mode>2_prologue" + [(set (match_operand:P 0 "push_operand" "=<") + (match_operand:P 1 "general_no_elim_operand" "r<i>*m")) + (clobber (mem:BLK (scratch)))] + "" + "push{<imodesuffix>}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pop<mode>1" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">"))] + "" + "pop{<imodesuffix>}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pop<mode>1_epilogue" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">")) + (clobber (mem:BLK (scratch)))] + "" + "pop{<imodesuffix>}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "<MODE>")]) + +;; Move instructions. + +(define_expand "movoi" + [(set (match_operand:OI 0 "nonimmediate_operand" "") + (match_operand:OI 1 "general_operand" ""))] + "TARGET_AVX" + "ix86_expand_move (OImode, operands); DONE;") + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_64BIT || TARGET_SSE" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand" "") + (match_operand:CDI 1 "general_operand" ""))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +(define_expand "mov<mode>" + [(set (match_operand:SWI1248x 0 "nonimmediate_operand" "") + (match_operand:SWI1248x 1 "general_operand" ""))] + "" + "ix86_expand_move (<MODE>mode, operands); DONE;") + +(define_insn "*mov<mode>_xor" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{l}\t%k0, %k0" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov<mode>_or" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const_int_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx" + "or{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "1")]) + +(define_insn "*movoi_internal_avx" + [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:OI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return "vxorps\t%0, %0, %0"; + case 1: + case 2: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_insn "*movti_internal_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed + && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])" [(const_int 0)] "ix86_split_long_move (operands); DONE;") -(define_insn "*movdi_1_rex64" +(define_insn "*movti_internal_sse" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 1: + case 2: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) + +(define_insn "*movdi_internal_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") (match_operand:DI 1 "general_operand" @@ -2599,37 +2050,6 @@ (const_string "orig"))) (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) -;; Stores and loads of ax to arbitrary constant address. -;; We fake an second form of instruction to force reload to load address -;; into register when rax is not available -(define_insn "*movabsdi_1_rex64" - [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) - (match_operand:DI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT && ix86_check_movabs (insn, 0)" - "@ - movabs{q}\t{%1, %P0|%P0, %1} - mov{q}\t{%1, %a0|%a0, %1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0,*") - (set_attr "memory" "store") - (set_attr "mode" "DI")]) - -(define_insn "*movabsdi_2_rex64" - [(set (match_operand:DI 0 "register_operand" "=a,r") - (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT && ix86_check_movabs (insn, 1)" - "@ - movabs{q}\t{%P1, %0|%0, %P1} - mov{q}\t{%a1, %0|%0, %a1}" - [(set_attr "type" "imov") - (set_attr "modrm" "0,*") - (set_attr "length_address" "8,0") - (set_attr "length_immediate" "0") - (set_attr "memory" "load") - (set_attr "mode" "DI")]) - ;; Convert impossible stores of immediate to existing instructions. ;; First try to get scratch register and go through it. In case this ;; fails, move by 32bit parts. @@ -2640,8 +2060,7 @@ "TARGET_64BIT && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. @@ -2653,7 +2072,7 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (&operands[0], 2, &operands[2], &operands[4]);") + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -2664,201 +2083,654 @@ && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (&operands[0], 2, &operands[2], &operands[4]);") - -(define_insn "*swapdi_rex64" - [(set (match_operand:DI 0 "register_operand" "+r") - (match_operand:DI 1 "register_operand" "+r")) + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") + +(define_insn "*movdi_internal" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + # + # + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + %vpxor\t%0, %d0 + %vmovq\t{%1, %0|%0, %1} + %vmovdqa\t{%1, %0|%0, %1} + %vmovq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8") + (const_string "vex") + (const_string "orig"))) + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + (match_operand:SI 1 "general_operand" + "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + return "%vxorps\t%0, %d0"; + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "%vmovss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{l}\t{%a1, %0|%0, %a1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "mmx") + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "ssemov") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "imov") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "SI") + ] + (const_string "HI")))]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "3") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0))))) + (const_string "SI") + ] + (const_string "QI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabs<mode>_1" + [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SWI1248x 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{<imodesuffix>}\t{%1, %P0|%P0, %1} + mov{<imodesuffix>}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "<MODE>")]) + +(define_insn "*movabs<mode>_2" + [(set (match_operand:SWI1248x 0 "register_operand" "=a,r") + (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{<imodesuffix>}\t{%P1, %0|%0, %P1} + mov{<imodesuffix>}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "<MODE>")]) + +(define_insn "*swap<mode>" + [(set (match_operand:SWI48 0 "register_operand" "+r") + (match_operand:SWI48 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "TARGET_64BIT" - "xchg{q}\t%1, %0" + "" + "xchg{<imodesuffix>}\t%1, %0" [(set_attr "type" "imov") - (set_attr "mode" "DI") + (set_attr "mode" "<MODE>") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) - -(define_expand "movoi" - [(set (match_operand:OI 0 "nonimmediate_operand" "") - (match_operand:OI 1 "general_operand" ""))] - "TARGET_AVX" - "ix86_expand_move (OImode, operands); DONE;") - -(define_insn "*movoi_internal" - [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:OI 1 "vector_move_operand" "C,xm,x"))] - "TARGET_AVX - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - return "vxorps\t%0, %0, %0"; - case 1: - case 2: - if (misaligned_operand (operands[0], OImode) - || misaligned_operand (operands[1], OImode)) - return "vmovdqu\t{%1, %0|%0, %1}"; - else - return "vmovdqa\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "sselog1,ssemov,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_expand "movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "nonimmediate_operand" ""))] - "TARGET_SSE || TARGET_64BIT" -{ - if (TARGET_64BIT) - ix86_expand_move (TImode, operands); - else if (push_operand (operands[0], TImode)) - ix86_expand_push (TImode, operands[1]); - else - ix86_expand_vector_move (TImode, operands); - DONE; -}) - -(define_insn "*movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*swap<mode>_1" + [(set (match_operand:SWI12 0 "register_operand" "+r") + (match_operand:SWI12 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL +;; is disabled for AMDFAM10 +(define_insn "*swap<mode>_2" + [(set (match_operand:SWI12 0 "register_operand" "+<r>") + (match_operand:SWI12 1 "register_operand" "+<r>")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{<imodesuffix>}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstrict<mode>" + [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand" "")) + (match_operand:SWI12 1 "general_operand" ""))] + "" +{ + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; + /* Don't generate memory->memory moves, go through a register */ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +(define_insn "*movstrict<mode>_1" + [(set (strict_low_part + (match_operand:SWI12 0 "nonimmediate_operand" "+<r>m,<r>")) + (match_operand:SWI12 1 "general_operand" "<r>n,m"))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - case 1: - case 2: - /* TDmode values are passed as TImode on the stack. Moving them - to stack may result in unaligned memory access. */ - if (misaligned_operand (operands[0], TImode) - || misaligned_operand (operands[1], TImode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } - default: - gcc_unreachable (); - } -} - [(set_attr "type" "sselog1,ssemov,ssemov") - (set_attr "prefix" "maybe_vex") + "mov{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>")]) + +(define_insn "*movstrict<mode>_xor" + [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>")) + (match_operand:SWI12 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{<imodesuffix>}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov<mode>_extv_1" + [(set (match_operand:SWI24 0 "register_operand" "=R") + (sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) (set (attr "mode") - (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))) - (const_string "V4SF") - (and (eq_attr "alternative" "2") - (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0))) - (const_string "V4SF")] - (const_string "TI")))]) - -(define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm") - (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - case 3: - case 4: - /* TDmode values are passed as TImode on the stack. Moving them - to stack may result in unaligned memory access. */ - if (misaligned_operand (operands[0], TImode) - || misaligned_operand (operands[1], TImode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } - default: - gcc_unreachable (); - } -} - [(set_attr "type" "*,*,sselog1,ssemov,ssemov") - (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*mov<mode>_extzv_1" + [(set (match_operand:SWI48 0 "register_operand" "=R") + (zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_split - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_expand "mov<mode>_insv_1" + [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:SWI48 1 "nonmemory_operand" ""))]) + +(define_insn "*mov<mode>_insv_1_rex64" + [(set (zero_extract:SWI48x (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SWI48x 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movsi_insv_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "general_operand" "Qmn"))] + "!TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +;; Floating point push instructions. + +(define_insn "*pushtf" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] + "TARGET_SSE2" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "sse,*,*") + (set_attr "mode" "TF,SI,SI")]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "sse_reg_operand" ""))] + "TARGET_SSE2 && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (mem:TF (reg:P SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "TARGET_SSE2 && reload_completed && !SSE_REG_P (operands[1])" [(const_int 0)] "ix86_split_long_move (operands); DONE;") -;; This expands to what emit_move_complex would generate if we didn't -;; have a movti pattern. Having this avoids problems with reload on -;; 32-bit targets when SSE is present, but doesn't seem to be harmful -;; to have around all the time. -(define_expand "movcdi" - [(set (match_operand:CDI 0 "nonimmediate_operand" "") - (match_operand:CDI 1 "general_operand" ""))] - "" -{ - if (push_operand (operands[0], CDImode)) - emit_move_complex_push (CDImode, operands[0], operands[1]); - else - emit_move_complex_parts (operands[0], operands[1]); - DONE; -}) - -(define_expand "movsf" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (match_operand:SF 1 "general_operand" ""))] - "" - "ix86_expand_move (SFmode, operands); DONE;") +(define_insn "*pushxf" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "optimize_function_for_speed_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set_attr "mode" "XF,SI")]) + +;; Size of pushxf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushxf using integer instructions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references (assuming that any given constant is pushed +;; only once, but this ought to be handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_function_for_size_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "XF,SI,SI")]) + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushdf" + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "DF,SI,DF")]) + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] + "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") + (set_attr "mode" "DF,SI,SI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,DI,SF")]) (define_insn "*pushsf" [(set (match_operand:SF 0 "push_operand" "=<,<,<") @@ -2873,19 +2745,6 @@ (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,SI,SF")]) -(define_insn "*pushsf_rex64" - [(set (match_operand:SF 0 "push_operand" "=X,X,X") - (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] - "TARGET_64BIT" -{ - /* Anything else should be already split before reg-stack. */ - gcc_assert (which_alternative == 1); - return "push{q}\t%q1"; -} - [(set_attr "type" "multi,push,multi") - (set_attr "unit" "i387,*,*") - (set_attr "mode" "SF,DI,SF")]) - (define_split [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" ""))] @@ -2899,29 +2758,89 @@ (define_split [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "any_fp_register_operand" ""))] - "!TARGET_64BIT" - [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) - (set (mem:SF (reg:SI SP_REG)) (match_dup 1))]) - -(define_split - [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "any_fp_register_operand" ""))] - "TARGET_64BIT" - [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) - (set (mem:SF (reg:DI SP_REG)) (match_dup 1))]) - -(define_insn "*movsf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" - "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") - (match_operand:SF 1 "general_operand" - "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1])) + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:SF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (<MODE>mode));") + +;; Floating point move instructions. + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_expand "mov<mode>" + [(set (match_operand:X87MODEF 0 "nonimmediate_operand" "") + (match_operand:X87MODEF 1 "general_operand" ""))] + "" + "ix86_expand_move (<MODE>mode, operands); DONE;") + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movxf_internal" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "optimize_function_for_speed_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], SFmode))" + || memory_operand (operands[0], XFmode))" { switch (which_alternative) { @@ -2932,169 +2851,26 @@ case 2: return standard_80387_constant_opcode (operands[1]); - case 3: - case 4: - return "mov{l}\t{%1, %0|%0, %1}"; - case 5: - if (get_attr_mode (insn) == MODE_TI) - return "%vpxor\t%0, %d0"; - else - return "%vxorps\t%0, %d0"; - case 6: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovss\t{%1, %d0|%d0, %1}"; - case 7: - if (TARGET_AVX) - return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" - : "vmovss\t{%1, %0|%0, %1}"; - else - return "movss\t{%1, %0|%0, %1}"; - case 8: - return "%vmovss\t{%1, %0|%0, %1}"; - - case 9: case 10: case 14: case 15: - return "movd\t{%1, %0|%0, %1}"; - case 12: case 13: - return "%vmovd\t{%1, %0|%0, %1}"; - - case 11: - return "movq\t{%1, %0|%0, %1}"; + case 3: case 4: + return "#"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") - (const_string "maybe_vex") - (const_string "orig"))) - (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,9,10") - (const_string "SI") - (eq_attr "alternative" "5") - (if_then_else - (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE2") - (const_int 0))) - (eq (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0))) - (const_string "TI") - (const_string "V4SF")) - /* For architectures resolving dependencies on - whole SSE registers use APS move to break dependency - chains, otherwise use short move to avoid extra work. - - Do the same for architectures resolving dependencies on - the parts. While in DF mode it is better to always handle - just register parts, the SF mode is different due to lack - of instructions to load just part of the register. It is - better to maintain the whole registers in single format - to avoid problems on using packed logical operations. */ - (eq_attr "alternative" "6") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") - (const_int 0))) - (const_string "V4SF") - (const_string "SF")) - (eq_attr "alternative" "11") - (const_string "DI")] - (const_string "SF")))]) - -(define_insn "*swapsf" - [(set (match_operand:SF 0 "fp_register_operand" "+f") - (match_operand:SF 1 "fp_register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "reload_completed || TARGET_80387" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "SF")]) - -(define_expand "movdf" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "" - "ix86_expand_move (DFmode, operands); DONE;") - -;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer instructions is 2+2*memory operand size -;; On the average, pushdf using integers can be still shorter. Allow this -;; pattern for optimize_size too. - -(define_insn "*pushdf_nointeger" - [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] - "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*,*") - (set_attr "mode" "DF,SI,SI,DF")]) - -(define_insn "*pushdf_integer" - [(set (match_operand:DF 0 "push_operand" "=<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] - "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*") - (set_attr "mode" "DF,SI,DF")]) - -;; %%% Kill this when call knows how to work this out. -(define_split - [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "any_fp_register_operand" ""))] - "reload_completed" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) - (set (mem:DF (reg:P SP_REG)) (match_dup 1))] - "") - -(define_split - [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "reload_completed" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -;; Moving is usually shorter when only FP registers are used. This separate -;; movdf pattern avoids the use of integer registers for FP operations -;; when optimizing for size. - -(define_insn "*movdf_nointeger" - [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") - (match_operand:DF 1 "general_operand" - "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && ((optimize_function_for_size_p (cfun) - || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_internal_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "optimize_function_for_size_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!(TARGET_SSE2 && TARGET_SSE_MATH) - && optimize_function_for_size_p (cfun) - && !memory_operand (operands[0], DFmode) - && standard_80387_constant_p (operands[1])) + || standard_80387_constant_p (operands[1]) || GET_CODE (operands[1]) != CONST_DOUBLE - || ((optimize_function_for_size_p (cfun) - || !TARGET_MEMORY_MISMATCH_STALL - || reload_in_progress || reload_completed) - && memory_operand (operands[0], DFmode)))" + || memory_operand (operands[0], XFmode))" { switch (which_alternative) { @@ -3105,145 +2881,30 @@ case 2: return standard_80387_constant_opcode (operands[1]); - case 3: - case 4: + case 3: case 4: return "#"; - case 5: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vxorps\t%0, %d0"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vxorpd\t%0, %d0"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - default: - gcc_unreachable (); - } - case 6: - case 7: - case 8: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovapd\t{%1, %0|%0, %1}"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_DI: - return "%vmovq\t{%1, %0|%0, %1}"; - case MODE_DF: - if (TARGET_AVX) - { - if (REG_P (operands[0]) && REG_P (operands[1])) - return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovsd\t{%1, %0|%0, %1}"; - } - else - return "movsd\t{%1, %0|%0, %1}"; - case MODE_V1DF: - if (TARGET_AVX) - { - if (REG_P (operands[0])) - return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovlpd\t{%1, %0|%0, %1}"; - } - else - return "movlpd\t{%1, %0|%0, %1}"; - case MODE_V2SF: - if (TARGET_AVX) - { - if (REG_P (operands[0])) - return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovlps\t{%1, %0|%0, %1}"; - } - else - return "movlps\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } - default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4") - (const_string "orig") - (const_string "maybe_vex"))) - (set (attr "prefix_data16") - (if_then_else (eq_attr "mode" "V1DF") - (const_string "1") - (const_string "*"))) - (set (attr "mode") - (cond [(eq_attr "alternative" "0,1,2") - (const_string "DF") - (eq_attr "alternative" "3,4") - (const_string "SI") - - /* For SSE1, we have many fewer alternatives. */ - (eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (cond [(eq_attr "alternative" "5,6") - (const_string "V4SF") - ] - (const_string "V2SF")) - - /* xorps is one byte shorter. */ - (eq_attr "alternative" "5") - (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (const_string "TI") - ] - (const_string "V2DF")) - - /* For architectures resolving dependencies on - whole SSE registers use APD move to break dependency - chains, otherwise use short move to avoid extra work. - - movaps encodes one byte shorter. */ - (eq_attr "alternative" "6") - (cond - [(ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (const_int 0)) - (const_string "V2DF") - ] - (const_string "DF")) - /* For architectures resolving dependencies on register - parts we may avoid extra work to zero out upper part - of register. */ - (eq_attr "alternative" "7") - (if_then_else - (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") - (const_int 0)) - (const_string "V1DF") - (const_string "DF")) - ] - (const_string "DF")))]) - -(define_insn "*movdf_integer_rex64" + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_split + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && FP_REG_P (SUBREG_REG (operands[0])))) + && ! (FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movdf_internal_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") (match_operand:DF 1 "general_operand" @@ -3393,7 +3054,7 @@ ] (const_string "DF")))]) -(define_insn "*movdf_integer" +(define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") (match_operand:DF 1 "general_operand" @@ -3529,6 +3190,178 @@ ] (const_string "DF")))]) +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_internal_nointeger" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ((optimize_function_for_size_p (cfun) + || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && !memory_operand (operands[0], DFmode) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || ((optimize_function_for_size_p (cfun) + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vxorpd\t%0, %d0"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "V1DF") + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") (match_operand:DF 1 "general_operand" ""))] @@ -3543,113 +3376,18 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") -(define_insn "*swapdf" - [(set (match_operand:DF 0 "fp_register_operand" "+f") - (match_operand:DF 1 "fp_register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "reload_completed || TARGET_80387" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "DF")]) - -(define_expand "movxf" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (match_operand:XF 1 "general_operand" ""))] - "" - "ix86_expand_move (XFmode, operands); DONE;") - -;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer instructions is 3+3*memory operand size -;; Pushing using integer instructions is longer except for constants -;; and direct memory references. -;; (assuming that any given constant is pushed only once, but this ought to be -;; handled elsewhere). - -(define_insn "*pushxf_nointeger" - [(set (match_operand:XF 0 "push_operand" "=X,X,X") - (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] - "optimize_function_for_size_p (cfun)" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*") - (set_attr "mode" "XF,SI,SI")]) - -(define_insn "*pushxf_integer" - [(set (match_operand:XF 0 "push_operand" "=<,<") - (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] - "optimize_function_for_speed_p (cfun)" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*") - (set_attr "mode" "XF,SI")]) - -(define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "general_operand" ""))] - "reload_completed - && (GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode) - && !ANY_FP_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_split - [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "any_fp_register_operand" ""))] - "" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) - (set (mem:XF (reg:P SP_REG)) (match_dup 1))] - "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") - -;; Do not use integer registers when optimizing for size -(define_insn "*movxf_nointeger" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "optimize_function_for_size_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + (match_operand:SF 1 "general_operand" + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed - || standard_80387_constant_p (operands[1]) + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" -{ - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: case 4: - return "#"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_insn "*movxf_integer" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] - "optimize_function_for_speed_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && (reload_in_progress || reload_completed - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" + || memory_operand (operands[0], SFmode))" { switch (which_alternative) { @@ -3660,112 +3398,79 @@ case 2: return standard_80387_constant_opcode (operands[1]); - case 3: case 4: - return "#"; + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + else + return "%vxorps\t%0, %d0"; + case 6: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovss\t{%1, %d0|%d0, %1}"; + case 7: + if (TARGET_AVX) + return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" + : "vmovss\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 8: + return "%vmovss\t{%1, %0|%0, %1}"; + + case 9: case 10: case 14: case 15: + return "movd\t{%1, %0|%0, %1}"; + case 12: case 13: + return "%vmovd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_SSE2" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) - -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") - (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] - "TARGET_SSE2 - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - case 1: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - case 3: - case 4: - return "#"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "ssemov,ssemov,sselog1,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "0,2") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") (if_then_else - (ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "1") + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (const_int 0)) - (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") (const_int 0))) (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_insn "*pushtf_sse" - [(set (match_operand:TF 0 "push_operand" "=<,<,<") - (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] - "TARGET_SSE2" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "sse,*,*") - (set_attr "mode" "TF,SI,SI")]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "TARGET_SSE2 && reload_completed - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "any_fp_register_operand" ""))] - "TARGET_SSE2" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) - (set (mem:TF (reg:P SP_REG)) (match_dup 1))] - "") - -(define_split - [(set (match_operand 0 "nonimmediate_operand" "") - (match_operand 1 "general_operand" ""))] - "reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && GET_MODE (operands[0]) == XFmode - && ! (ANY_FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || - (GET_CODE (operands[1]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_split [(set (match_operand 0 "register_operand" "") @@ -3774,8 +3479,8 @@ && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == SFmode - || GET_MODE (operands[0]) == DFmode) + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { @@ -3806,8 +3511,8 @@ && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == SFmode - || GET_MODE (operands[0]) == DFmode) + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { @@ -3831,21 +3536,6 @@ FAIL; }) -(define_insn "swapxf" - [(set (match_operand:XF 0 "register_operand" "+f") - (match_operand:XF 1 "register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_80387" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "XF")]) - ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence (define_split [(set (match_operand:X87MODEF 0 "register_operand" "") @@ -3866,221 +3556,76 @@ operands[1] = CONST1_RTX (<MODE>mode); }) -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed - && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_insn "*swap<mode>" + [(set (match_operand:MODEF 0 "fp_register_operand" "+f") + (match_operand:MODEF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387 || reload_completed" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "<MODE>")]) ;; Zero extension instructions -(define_expand "zero_extendhisi2" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] - "" -{ - if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) - { - operands[1] = force_reg (HImode, operands[1]); - emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); - DONE; - } -}) - -(define_insn "zero_extendhisi2_and" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" - "#" - [(set_attr "type" "alu1") - (set_attr "mode" "SI")]) - -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND - && optimize_function_for_speed_p (cfun)" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_insn "*zero_extendhisi2_movzwl" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] - "!TARGET_ZERO_EXTEND_WITH_AND - || optimize_function_for_size_p (cfun)" - "movz{wl|x}\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -(define_expand "zero_extendqihi2" - [(parallel - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") - -(define_insn "*zero_extendqihi2_and" - [(set (match_operand:HI 0 "register_operand" "=r,?&q") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" - "#" - [(set_attr "type" "alu1") - (set_attr "mode" "HI")]) - -(define_insn "*zero_extendqihi2_movzbw_and" - [(set (match_operand:HI 0 "register_operand" "=r,r") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" - "#" - [(set_attr "type" "imovx,alu1") - (set_attr "mode" "HI")]) - -; zero extend to SImode here to avoid partial register stalls -(define_insn "*zero_extendqihi2_movzbl" - [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) - && reload_completed" - "movz{bl|x}\t{%1, %k0|%k0, %1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -;; For the movzbw case strip only the clobber -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && (!TARGET_ZERO_EXTEND_WITH_AND - || optimize_function_for_size_p (cfun)) - && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]) - -;; When source and destination does not overlap, clear destination -;; first and then do the movb -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ANY_QI_REG_P (operands[0]) - && (TARGET_ZERO_EXTEND_WITH_AND - && optimize_function_for_speed_p (cfun)) - && !reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (strict_low_part (match_dup 2)) (match_dup 1))] -{ - operands[2] = gen_lowpart (QImode, operands[0]); - ix86_expand_clear (operands[0]); -}) - -;; Rest is handled by single and. -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) == true_regnum (operands[1])" - [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_expand "zero_extendqisi2" - [(parallel - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") - -(define_insn "*zero_extendqisi2_and" - [(set (match_operand:SI 0 "register_operand" "=r,?&q") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" - "#" - [(set_attr "type" "alu1") - (set_attr "mode" "SI")]) - -(define_insn "*zero_extendqisi2_movzbl_and" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" - "#" - [(set_attr "type" "imovx,alu1") - (set_attr "mode" "SI")]) - -(define_insn "*zero_extendqisi2_movzbl" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) - && reload_completed" - "movz{bl|x}\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx") - (set_attr "mode" "SI")]) - -;; For the movzbl case strip only the clobber -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) - && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" - [(set (match_dup 0) - (zero_extend:SI (match_dup 1)))]) - -;; When source and destination does not overlap, clear destination -;; first and then do the movb -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ANY_QI_REG_P (operands[0]) - && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])) - && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) - && !reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (strict_low_part (match_dup 2)) (match_dup 1))] -{ - operands[2] = gen_lowpart (QImode, operands[0]); - ix86_expand_clear (operands[0]); -}) - -;; Rest is handled by single and. -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) == true_regnum (operands[1])" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) - (clobber (reg:CC FLAGS_REG))])] - "") - -;; %%% Kill me once multi-word ops are sane. (define_expand "zero_extendsidi2" - [(set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { if (!TARGET_64BIT) { - emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + emit_insn (gen_zero_extendsidi2_1 (operands[0], operands[1])); DONE; } }) -(define_insn "zero_extendsidi2_32" +(define_insn "*zero_extendsidi2_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] + "TARGET_64BIT" + "@ + mov\t{%k1, %k0|%k0, %k1} + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "prefix_0f" "0,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (zero_extend:DI (match_dup 0)))] + "TARGET_64BIT" + [(set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +;; %%% Kill me once multi-word ops are sane. +(define_insn "zero_extendsidi2_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) @@ -4098,30 +3643,6 @@ (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex") (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")]) -(define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") - (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] - "TARGET_64BIT" - "@ - mov\t{%k1, %k0|%k0, %k1} - # - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - %vmovd\t{%1, %0|%0, %1} - %vmovd\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") - (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") - (set_attr "prefix_0f" "0,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) - -(define_split - [(set (match_operand:DI 0 "memory_operand" "") - (zero_extend:DI (match_dup 0)))] - "TARGET_64BIT" - [(set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") - (define_split [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) @@ -4129,59 +3650,149 @@ "!TARGET_64BIT && reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" [(set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && reload_completed - && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])" + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") - -(define_insn "zero_extendhidi2" + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extend<mode>di2" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + (zero_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))] "TARGET_64BIT" - "movz{wl|x}\t{%1, %k0|%k0, %1}" + "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + { + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn_and_split "zero_extendhisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendhisi2_movzwl" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)" + "movz{wl|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) -(define_insn "zero_extendqidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))] - "TARGET_64BIT" +(define_expand "zero_extendqi<mode>2" + [(parallel + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*zero_extendqi<mode>2_and" + [(set (match_operand:SWI24 0 "register_operand" "=r,?&q") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>")]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + && ANY_QI_REG_P (operands[0]) + && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (strict_low_part (match_dup 2)) (match_dup 1))] +{ + operands[2] = gen_lowpart (QImode, operands[0]); + ix86_expand_clear (operands[0]); +}) + +(define_insn "*zero_extendqi<mode>2_movzbl_and" + [(set (match_operand:SWI24 0 "register_operand" "=r,r") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "<MODE>")]) + +;; For the movzbl case strip only the clobber +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_dup 0) + (zero_extend:SWI24 (match_dup 1)))]) + +; zero extend to SImode to avoid partial register stalls +(define_insn "*zero_extendqi<mode>2_movzbl" + [(set (match_operand:SWI24 0 "register_operand" "=r") + (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) + +;; Rest is handled by single and. +(define_split + [(set (match_operand:SWI24 0 "register_operand" "") + (zero_extend:SWI24 (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:SWI24 (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])]) ;; Sign extension instructions (define_expand "extendsidi2" - [(parallel [(set (match_operand:DI 0 "register_operand" "") - (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_scratch:SI 2 ""))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1])); + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" "")))] + "" +{ + if (!TARGET_64BIT) + { + emit_insn (gen_extendsidi2_1 (operands[0], operands[1])); DONE; } }) -(define_insn "*extendsidi2_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") - (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_scratch:SI 2 "=X,X,X,&r"))] - "!TARGET_64BIT" - "#") - -(define_insn "extendsidi2_rex64" +(define_insn "*extendsidi2_rex64" [(set (match_operand:DI 0 "register_operand" "=*a,r") (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] "TARGET_64BIT" @@ -4193,21 +3804,13 @@ (set_attr "prefix_0f" "0") (set_attr "modrm" "0,1")]) -(define_insn "extendhidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] - "TARGET_64BIT" - "movs{wq|x}\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx") - (set_attr "mode" "DI")]) - -(define_insn "extendqidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "TARGET_64BIT" - "movs{bq|x}\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx") - (set_attr "mode" "DI")]) +(define_insn "extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" + "#") ;; Extend to memory case when source register does die. (define_split @@ -4222,7 +3825,7 @@ (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 4) (match_dup 1))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") ;; Extend to memory case when source register does not die. (define_split @@ -4233,7 +3836,7 @@ "reload_completed" [(const_int 0)] { - split_di (&operands[0], 1, &operands[3], &operands[4]); + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); emit_move_insn (operands[3], operands[1]); @@ -4263,7 +3866,7 @@ "reload_completed" [(const_int 0)] { - split_di (&operands[0], 1, &operands[3], &operands[4]); + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); if (true_regnum (operands[3]) != true_regnum (operands[1])) emit_move_insn (operands[3], operands[1]); @@ -4284,6 +3887,15 @@ DONE; }) +(define_insn "extend<mode>di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))] + "TARGET_64BIT" + "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + (define_insn "extendhisi2" [(set (match_operand:SI 0 "register_operand" "=*a,r") (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] @@ -4313,7 +3925,8 @@ (define_insn "*extendhisi2_zext" [(set (match_operand:DI 0 "register_operand" "=*a,r") (zero_extend:DI - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + (sign_extend:SI + (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] "TARGET_64BIT" { switch (get_attr_prefix_0f (insn)) @@ -4337,32 +3950,6 @@ (const_string "0") (const_string "1")))]) -(define_insn "extendqihi2" - [(set (match_operand:HI 0 "register_operand" "=*a,r") - (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] - "" -{ - switch (get_attr_prefix_0f (insn)) - { - case 0: - return "{cbtw|cbw}"; - default: - return "movs{bw|x}\t{%1, %0|%0, %1}"; - } -} - [(set_attr "type" "imovx") - (set_attr "mode" "HI") - (set (attr "prefix_0f") - ;; movsx is short decodable while cwtl is vector decoded. - (if_then_else (and (eq_attr "cpu" "!k6") - (eq_attr "alternative" "0")) - (const_string "0") - (const_string "1"))) - (set (attr "modrm") - (if_then_else (eq_attr "prefix_0f" "0") - (const_string "0") - (const_string "1")))]) - (define_insn "extendqisi2" [(set (match_operand:SI 0 "register_operand" "=r") (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] @@ -4379,47 +3966,53 @@ "movs{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) ;; Conversions between float and double. -;; These are all no-ops in the model used for the 80387. So just -;; emit moves. +;; These are all no-ops in the model used for the 80387. +;; So just emit moves. ;; %%% Kill these when call knows how to work out a DFmode push earlier. -(define_insn "*dummy_extendsfdf2" - [(set (match_operand:DF 0 "push_operand" "=<") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))] - "0" - "#") - (define_split [(set (match_operand:DF 0 "push_operand" "") (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] - "" + "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) -(define_insn "*dummy_extendsfxf2" - [(set (match_operand:XF 0 "push_operand" "=<") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))] - "0" - "#") - (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] - "" + (float_extend:XF (match_operand:MODEF 1 "fp_register_operand" "")))] + "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] - "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") - -(define_split - [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] - "" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) - (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] - "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -4634,8 +4227,7 @@ (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0 "" "") (float_truncate:SF (match_operand:DF 1 "" ""))) - (clobber (match_operand:SF 2 "" ""))])] - "") + (clobber (match_operand:SF 2 "" ""))])]) (define_insn "*truncdfsf_fast_mixed" [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") @@ -4740,9 +4332,7 @@ "reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] -{ - operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); -}) + "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));") ;; Conversion from XFmode to {SF,DF}mode @@ -4763,9 +4353,9 @@ } else { - enum ix86_stack_slot slot = (virtuals_instantiated - ? SLOT_TEMP - : SLOT_VIRTUAL); + enum ix86_stack_slot slot = (virtuals_instantiated + ? SLOT_TEMP + : SLOT_VIRTUAL); operands[2] = assign_386_stack_local (<MODE>mode, slot); } }) @@ -4823,8 +4413,7 @@ (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_split [(set (match_operand:MODEF 0 "memory_operand" "") @@ -4832,8 +4421,7 @@ (match_operand:XF 1 "register_operand" ""))) (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387" - [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]) ;; Signed conversion to DImode. @@ -4947,7 +4535,7 @@ real_ldexp (&TWO31r, &dconst1, 31); two31 = const_double_from_real_value (TWO31r, mode); - two31 = ix86_build_const_vector (mode, true, two31); + two31 = ix86_build_const_vector (vecmode, true, two31); operands[2] = force_reg (vecmode, two31); }) @@ -4992,7 +4580,8 @@ (set_attr "prefix_rex" "1") (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) + (set_attr "amdfam10_decode" "double,double") + (set_attr "bdver1_decode" "double,double")]) (define_insn "fix_trunc<mode>si_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") @@ -5004,7 +4593,8 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) + (set_attr "amdfam10_decode" "double,double") + (set_attr "bdver1_decode" "double,double")]) ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. (define_peephole2 @@ -5013,9 +4603,9 @@ (set (match_operand:SSEMODEI24 2 "register_operand" "") (fix:SSEMODEI24 (match_dup 0)))] "TARGET_SHORTEN_X87_SSE + && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()) && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] - "") + [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]) ;; Avoid vector decoded forms of the instruction. (define_peephole2 @@ -5024,8 +4614,7 @@ (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] - "") + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) (define_peephole2 [(match_scratch:SF 2 "x") @@ -5033,8 +4622,7 @@ (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] - "") + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1" [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") @@ -5098,8 +4686,7 @@ "reload_completed" [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) (clobber (match_dup 3))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_split [(set (match_operand:X87MODEI 0 "memory_operand" "") @@ -5108,8 +4695,7 @@ (clobber (match_scratch 3 ""))] "reload_completed" [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) - (clobber (match_dup 3))])] - "") + (clobber (match_dup 3))])]) ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control @@ -5190,8 +4776,7 @@ (use (match_dup 2)) (use (match_dup 3)) (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -5204,8 +4789,7 @@ [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) (use (match_dup 2)) (use (match_dup 3)) - (clobber (match_dup 5))])] - "") + (clobber (match_dup 5))])]) (define_insn "fix_trunc<mode>_i387" [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") @@ -5244,8 +4828,7 @@ [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:X87MODEI12 0 "memory_operand" "") @@ -5256,28 +4839,31 @@ "reload_completed" [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) - (use (match_dup 3))])] - "") + (use (match_dup 3))])]) (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" - [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) (set_attr "mode" "HI") - (set_attr "unit" "i387")]) + (set_attr "unit" "i387") + (set_attr "bdver1_decode" "vector")]) (define_insn "x86_fldcw_1" [(set (reg:HI FPCR_REG) (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] "TARGET_80387" "fldcw\t%0" - [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) (set_attr "mode" "HI") (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) ;; Conversion between fixed point and floating point. @@ -5289,8 +4875,7 @@ (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] "TARGET_80387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387)" - "") + || TARGET_MIX_SSE_I387)") ;; Pre-reload splitter to add memory clobber to the pattern. (define_insn_and_split "*floathi<mode>2_1" @@ -5340,8 +4925,7 @@ || TARGET_MIX_SSE_I387) && reload_completed" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:X87MODEF (match_dup 2)))] - "") + (set (match_dup 0) (float:X87MODEF (match_dup 2)))]) (define_split [(set (match_operand:X87MODEF 0 "register_operand" "") @@ -5351,8 +4935,7 @@ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && reload_completed" - [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) (define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2" [(set (match_operand:X87MODEF 0 "register_operand" "") @@ -5434,6 +5017,7 @@ (set_attr "unit" "*,i387,*,*,*") (set_attr "athlon_decode" "*,*,double,direct,double") (set_attr "amdfam10_decode" "*,*,vector,double,double") + (set_attr "bdver1_decode" "*,*,double,direct,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsi<mode>2_vector_mixed" @@ -5449,6 +5033,7 @@ (set_attr "unit" "i387,*") (set_attr "athlon_decode" "*,direct") (set_attr "amdfam10_decode" "*,double") + (set_attr "bdver1_decode" "*,direct") (set_attr "fp_int_src" "true")]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp" @@ -5464,6 +5049,7 @@ (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") (set_attr "amdfam10_decode" "*,*,vector,double") + (set_attr "bdver1_decode" "*,*,double,direct") (set_attr "fp_int_src" "true")]) (define_split @@ -5477,8 +5063,7 @@ && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && SSE_REG_P (operands[0])))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_split [(set (match_operand:MODEF 0 "register_operand" "") @@ -5492,8 +5077,7 @@ || (GET_CODE (operands[0]) == SUBREG && SSE_REG_P (operands[0])))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:MODEF (match_dup 2)))] - "") + (set (match_dup 0) (float:MODEF (match_dup 2)))]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit" [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") @@ -5518,6 +5102,7 @@ (set_attr "unit" "i387,*,*") (set_attr "athlon_decode" "*,double,direct") (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "bdver1_decode" "*,double,direct") (set_attr "fp_int_src" "true")]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit" @@ -5541,6 +5126,7 @@ (const_string "*"))) (set_attr "athlon_decode" "*,direct") (set_attr "amdfam10_decode" "*,double") + (set_attr "bdver1_decode" "*,direct") (set_attr "fp_int_src" "true")]) (define_insn "*floatsi<mode>2_vector_sse_with_temp" @@ -5555,6 +5141,7 @@ (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>") (set_attr "athlon_decode" "double,direct,double") (set_attr "amdfam10_decode" "vector,double,double") + (set_attr "bdver1_decode" "double,direct,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsi<mode>2_vector_sse" @@ -5567,6 +5154,7 @@ (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "fp_int_src" "true")]) (define_split @@ -5702,6 +5290,7 @@ (set_attr "mode" "<MODEF:MODE>") (set_attr "athlon_decode" "double,direct") (set_attr "amdfam10_decode" "vector,double") + (set_attr "bdver1_decode" "double,direct") (set_attr "fp_int_src" "true")]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit" @@ -5723,6 +5312,7 @@ (const_string "*"))) (set_attr "athlon_decode" "double,direct") (set_attr "amdfam10_decode" "vector,double") + (set_attr "bdver1_decode" "double,direct") (set_attr "fp_int_src" "true")]) (define_split @@ -5736,8 +5326,7 @@ && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && SSE_REG_P (operands[0])))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit" [(set (match_operand:MODEF 0 "register_operand" "=x") @@ -5758,6 +5347,7 @@ (const_string "*"))) (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "fp_int_src" "true")]) (define_split @@ -5772,8 +5362,7 @@ || (GET_CODE (operands[0]) == SUBREG && SSE_REG_P (operands[0])))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:MODEF (match_dup 2)))] - "") + (set (match_dup 0) (float:MODEF (match_dup 2)))]) (define_split [(set (match_operand:MODEF 0 "register_operand" "") @@ -5785,8 +5374,7 @@ && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && SSE_REG_P (operands[0])))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp" [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") @@ -5823,8 +5411,7 @@ && reload_completed && FP_REG_P (operands[0])" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:X87MODEF (match_dup 2)))] - "") + (set (match_dup 0) (float:X87MODEF (match_dup 2)))]) (define_split [(set (match_operand:X87MODEF 0 "register_operand" "") @@ -5834,8 +5421,7 @@ && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode) && reload_completed && FP_REG_P (operands[0])" - [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ @@ -5893,8 +5479,7 @@ && !TARGET_64BIT && optimize_function_for_speed_p (cfun) && reload_completed && FP_REG_P (operands[0])" - [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] - "") + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two @@ -6023,7 +5608,7 @@ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);") + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") (define_insn "*add<mode>3_cc" [(set (reg:CC FLAGS_REG) @@ -6052,8 +5637,8 @@ (set_attr "mode" "QI")]) (define_insn "*lea_1" - [(set (match_operand:DWIH 0 "register_operand" "=r") - (match_operand:DWIH 1 "no_seg_address_operand" "p"))] + [(set (match_operand:P 0 "register_operand" "=r") + (match_operand:P 1 "no_seg_address_operand" "p"))] "" "lea{<imodesuffix>}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -6087,8 +5672,7 @@ switch (get_attr_type (insn)) { case TYPE_LEA: - operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}"; + return "#"; case TYPE_INCDEC: gcc_assert (rtx_equal_p (operands[0], operands[1])); @@ -6101,9 +5685,13 @@ } default: - /* Use add as much as possible to replace lea for AGU optimization. */ - if (which_alternative == 2 && TARGET_OPT_AGU) - return "add{<imodesuffix>}\t{%1, %0|%0, %1}"; + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) @@ -6113,15 +5701,8 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "2") - (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) - (const_string "lea") - (eq_attr "alternative" "3") + (cond [(eq_attr "alternative" "3") (const_string "lea") - ; Current assemblers are broken and do not allow @GOTOFF in - ; ought but a memory context. - (match_operand:SWI48 2 "pic_symbolic_operand" "") - (const_string "lea") (match_operand:SWI48 2 "incdec_operand" "") (const_string "incdec") ] @@ -6140,18 +5721,17 @@ ;; patterns constructed from addsi_1 to match. (define_insn "*addsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") - (match_operand:SI 2 "general_operand" "g,li")))) + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r") + (match_operand:SI 2 "general_operand" "g,0,li")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: - operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); - return "lea{l}\t{%a2, %k0|%k0, %a2}"; + return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) @@ -6163,6 +5743,14 @@ } default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + if (x86_maybe_negate_const_int (&operands[2], SImode)) return "sub{l}\t{%2, %k0|%k0, %2}"; @@ -6170,11 +5758,7 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "1") - (const_string "lea") - ; Current assemblers are broken and do not allow @GOTOFF in - ; ought but a memory context. - (match_operand:SI 2 "pic_symbolic_operand" "") + (cond [(eq_attr "alternative" "2") (const_string "lea") (match_operand:SI 2 "incdec_operand" "") (const_string "incdec") @@ -6224,14 +5808,10 @@ (const_string "*"))) (set_attr "mode" "HI")]) -;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah -;; type optimizations enabled by define-splits. This is not important -;; for PII, and in fact harmful because of partial register stalls. - (define_insn "*addhi_1_lea" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:HI 2 "general_operand" "rn,rm,ln"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:HI 2 "general_operand" "rmn,rn,0,ln"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, HImode, operands)" @@ -6240,7 +5820,9 @@ { case TYPE_LEA: return "#"; + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{w}\t%0"; else @@ -6250,6 +5832,15 @@ } default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], HImode)) return "sub{w}\t{%2, %0|%0, %2}"; @@ -6257,18 +5848,20 @@ } } [(set (attr "type") - (if_then_else (eq_attr "alternative" "2") - (const_string "lea") - (if_then_else (match_operand:HI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu")))) + (cond [(eq_attr "alternative" "3") + (const_string "lea") + (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI,HI,SI")]) - + (set_attr "mode" "HI,HI,HI,SI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*addqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") @@ -6314,21 +5907,24 @@ (const_string "*"))) (set_attr "mode" "QI,QI,SI")]) -;; %%% Potential partial reg stall on alternative 2. What to do? +;; %%% Potential partial reg stall on alternatives 3 and 4. What to do? (define_insn "*addqi_1_lea" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r") - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r") - (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,q,r,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,r") + (match_operand:QI 2 "general_operand" "qmn,qn,0,rn,0,ln"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && ix86_binary_operator_ok (PLUS, QImode, operands)" { - int widen = (which_alternative == 2); + int widen = (which_alternative == 3 || which_alternative == 4); + switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; else @@ -6338,6 +5934,15 @@ } default: + /* For most processors, ADD is faster than LEA. These alternatives + were added to use ADD as much as possible. */ + if (which_alternative == 2 || which_alternative == 4) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], QImode)) { if (widen) @@ -6352,17 +5957,18 @@ } } [(set (attr "type") - (if_then_else (eq_attr "alternative" "3") - (const_string "lea") - (if_then_else (match_operand:QI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu")))) + (cond [(eq_attr "alternative" "5") + (const_string "lea") + (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) (const_string "1") (const_string "*"))) - (set_attr "mode" "QI,QI,SI,SI")]) + (set_attr "mode" "QI,QI,QI,SI,SI,SI")]) (define_insn "*addqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -6400,25 +6006,82 @@ (const_string "none"))) (set_attr "mode" "QI")]) +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (plus (match_operand 1 "register_operand" "") + (match_operand 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(const_int 0)] +{ + rtx pat; + enum machine_mode mode = GET_MODE (operands[0]); + + /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) + may confuse gen_lowpart. */ + if (mode != Pmode) + { + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + } + + pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); + + if (TARGET_64BIT && mode != Pmode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Convert lea to the lea pattern to avoid flags dependency. +;; ??? This pattern handles immediate operands that do not satisfy immediate +;; operand predicate (LEGITIMATE_CONSTANT_P) in the previous pattern. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (plus:DI (match_dup 1) (match_dup 2)))]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_lowpart (DImode, operands[2]); +}) + (define_insn "*add<mode>_2" [(set (reg FLAGS_REG) (compare - (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")) + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")) (const_int 0))) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm") - (plus:SWI48 (match_dup 1) (match_dup 2)))] + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m") + (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, <MODE>mode, operands) - /* Current assemblers are broken and do not allow @GOTOFF in - ought but a memory context. */ - && ! pic_symbolic_operand (operands[2], VOIDmode)" + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" { switch (get_attr_type (insn)) { case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{<imodesuffix>}\t%0"; else @@ -6428,9 +6091,6 @@ } default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - /* ???? In DImode, we ought to handle there the 32bit case too - - do we need new constraint? */ if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; @@ -6438,7 +6098,7 @@ } } [(set (attr "type") - (if_then_else (match_operand:SWI48 2 "incdec_operand" "") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") @@ -6458,10 +6118,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, SImode, operands) - /* Current assemblers are broken and do not allow @GOTOFF in - ought but a memory context. */ - && ! pic_symbolic_operand (operands[2], VOIDmode)" + && ix86_binary_operator_ok (PLUS, SImode, operands)" { switch (get_attr_type (insn)) { @@ -6492,100 +6149,19 @@ (const_string "*"))) (set_attr "mode" "SI")]) -(define_insn "*addhi_2" +(define_insn "*add<mode>_3" [(set (reg FLAGS_REG) (compare - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmn,rn")) - (const_int 0))) - (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") - (plus:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, HImode, operands)" + (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>")) + (match_operand:SWI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{w}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx); - return "dec{w}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], HImode)) - return "sub{w}\t{%2, %0|%0, %2}"; - - return "add{w}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:HI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set (attr "length_immediate") - (if_then_else - (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) - (const_string "1") - (const_string "*"))) - (set_attr "mode" "HI")]) - -(define_insn "*addqi_2" - [(set (reg FLAGS_REG) - (compare - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qmn,qn")) - (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") - (plus:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, QImode, operands)" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == const1_rtx) - return "inc{b}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)); - return "dec{b}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], QImode)) - return "sub{b}\t{%2, %0|%0, %2}"; - - return "add{b}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:QI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set_attr "mode" "QI")]) - -(define_insn "*add<mode>_3" - [(set (reg FLAGS_REG) - (compare - (neg:SWI48 (match_operand:SWI48 2 "<general_operand>" "<g>")) - (match_operand:SWI48 1 "nonimmediate_operand" "%0"))) - (clobber (match_scratch:SWI48 0 "=r"))] - "ix86_match_ccmode (insn, CCZmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2])) - /* Current assemblers are broken and do not allow @GOTOFF in - ought but a memory context. */ - && ! pic_symbolic_operand (operands[2], VOIDmode)" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - if (operands[2] == const1_rtx) return "inc{<imodesuffix>}\t%0"; else { @@ -6594,9 +6170,6 @@ } default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - /* ???? In DImode, we ought to handle there the 32bit case too - - do we need new constraint? */ if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; @@ -6604,7 +6177,7 @@ } } [(set (attr "type") - (if_then_else (match_operand:SWI48 2 "incdec_operand" "") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") @@ -6623,10 +6196,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) - && ix86_binary_operator_ok (PLUS, SImode, operands) - /* Current assemblers are broken and do not allow @GOTOFF in - ought but a memory context. */ - && ! pic_symbolic_operand (operands[2], VOIDmode)" + && ix86_binary_operator_ok (PLUS, SImode, operands)" { switch (get_attr_type (insn)) { @@ -6657,79 +6227,6 @@ (const_string "*"))) (set_attr "mode" "SI")]) -(define_insn "*addhi_3" - [(set (reg FLAGS_REG) - (compare - (neg:HI (match_operand:HI 2 "general_operand" "rmn")) - (match_operand:HI 1 "nonimmediate_operand" "%0"))) - (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCZmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == const1_rtx) - return "inc{w}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx); - return "dec{w}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], HImode)) - return "sub{w}\t{%2, %0|%0, %2}"; - - return "add{w}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:HI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set (attr "length_immediate") - (if_then_else - (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) - (const_string "1") - (const_string "*"))) - (set_attr "mode" "HI")]) - -(define_insn "*addqi_3" - [(set (reg FLAGS_REG) - (compare - (neg:QI (match_operand:QI 2 "general_operand" "qmn")) - (match_operand:QI 1 "nonimmediate_operand" "%0"))) - (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCZmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == const1_rtx) - return "inc{b}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)); - return "dec{b}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], QImode)) - return "sub{b}\t{%2, %0|%0, %2}"; - - return "add{b}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:QI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set_attr "mode" "QI")]) - ; For comparisons against 1, -1 and 128, we may generate better code ; by converting cmp to add, inc or dec as done by peephole2. This pattern ; is matched then. We can't accept general immediate, because for @@ -6758,7 +6255,6 @@ } default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], DImode)) return "add{q}\t{%2, %0|%0, %2}"; @@ -6783,75 +6279,34 @@ ; Also carry flag is reversed compared to cmp, so this conversion is valid ; only for comparisons not depending on it. -(define_insn "*addsi_4" +(define_insn "*add<mode>_4" [(set (reg FLAGS_REG) (compare - (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "const_int_operand" "n"))) - (clobber (match_scratch:SI 0 "=rm"))] + (match_operand:SWI124 1 "nonimmediate_operand" "0") + (match_operand:SWI124 2 "const_int_operand" "n"))) + (clobber (match_scratch:SWI124 0 "=<r>m"))] "ix86_match_ccmode (insn, CCGCmode)" { switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == constm1_rtx) - return "inc{l}\t%0"; + return "inc{<imodesuffix>}\t%0"; else { gcc_assert (operands[2] == const1_rtx); - return "dec{l}\t%0"; + return "dec{<imodesuffix>}\t%0"; } default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - if (x86_maybe_negate_const_int (&operands[2], SImode)) - return "add{l}\t{%2, %0|%0, %2}"; - - return "sub{l}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:SI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set (attr "length_immediate") - (if_then_else - (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) - (const_string "1") - (const_string "*"))) - (set_attr "mode" "SI")]) - -; See comments above addsi_4 for details. - -(define_insn "*addhi_4" - [(set (reg FLAGS_REG) - (compare - (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "const_int_operand" "n"))) - (clobber (match_scratch:HI 0 "=rm"))] - "ix86_match_ccmode (insn, CCGCmode)" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == constm1_rtx) - return "inc{w}\t%0"; - else - { - gcc_assert (operands[2] == const1_rtx); - return "dec{w}\t%0"; - } - - default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - if (x86_maybe_negate_const_int (&operands[2], HImode)) - return "add{w}\t{%2, %0|%0, %2}"; - - return "sub{w}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:HI 2 "incdec_operand" "") + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:<MODE> 2 "incdec_operand" "") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") @@ -6859,63 +6314,22 @@ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI")]) - -; See comments above addsi_4 for details. - -(define_insn "*addqi_4" + (set_attr "mode" "<MODE>")]) + +(define_insn "*add<mode>_5" [(set (reg FLAGS_REG) (compare - (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_operand" "n"))) - (clobber (match_scratch:QI 0 "=qm"))] - "ix86_match_ccmode (insn, CCGCmode)" + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "<general_operand>" "<g>")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)) - return "inc{b}\t%0"; - else - { - gcc_assert (operands[2] == const1_rtx); - return "dec{b}\t%0"; - } - - default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); - if (x86_maybe_negate_const_int (&operands[2], QImode)) - return "add{b}\t{%2, %0|%0, %2}"; - - return "sub{b}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:HI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set_attr "mode" "QI")]) - -(define_insn "*add<mode>_5" - [(set (reg FLAGS_REG) - (compare - (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0") - (match_operand:SWI48 2 "<general_operand>" "<g>")) - (const_int 0))) - (clobber (match_scratch:SWI48 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2])) - /* Current assemblers are broken and do not allow @GOTOFF in - ought but a memory context. */ - && ! pic_symbolic_operand (operands[2], VOIDmode)" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{<imodesuffix>}\t%0"; else @@ -6925,7 +6339,6 @@ } default: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; @@ -6933,7 +6346,7 @@ } } [(set (attr "type") - (if_then_else (match_operand:SWI48 2 "incdec_operand" "") + (if_then_else (match_operand:SWI 2 "incdec_operand" "") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") @@ -6943,81 +6356,6 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "*addhi_5" - [(set (reg FLAGS_REG) - (compare - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rmn")) - (const_int 0))) - (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == const1_rtx) - return "inc{w}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx); - return "dec{w}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], HImode)) - return "sub{w}\t{%2, %0|%0, %2}"; - - return "add{w}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:HI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set (attr "length_immediate") - (if_then_else - (and (eq_attr "type" "alu") (match_operand 2 "const128_operand" "")) - (const_string "1") - (const_string "*"))) - (set_attr "mode" "HI")]) - -(define_insn "*addqi_5" - [(set (reg FLAGS_REG) - (compare - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "general_operand" "qmn")) - (const_int 0))) - (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" -{ - switch (get_attr_type (insn)) - { - case TYPE_INCDEC: - if (operands[2] == const1_rtx) - return "inc{b}\t%0"; - else - { - gcc_assert (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)); - return "dec{b}\t%0"; - } - - default: - if (x86_maybe_negate_const_int (&operands[2], QImode)) - return "sub{b}\t{%2, %0|%0, %2}"; - - return "add{b}\t{%2, %0|%0, %2}"; - } -} - [(set (attr "type") - (if_then_else (match_operand:QI 2 "incdec_operand" "") - (const_string "incdec") - (const_string "alu"))) - (set_attr "mode" "QI")]) - (define_insn "*addqi_ext_1_rex64" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) @@ -7038,9 +6376,7 @@ return "inc{b}\t%h0"; else { - gcc_assert (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)); + gcc_assert (operands[2] == constm1_rtx); return "dec{b}\t%h0"; } @@ -7075,9 +6411,7 @@ return "inc{b}\t%h0"; else { - gcc_assert (operands[2] == constm1_rtx - || (CONST_INT_P (operands[2]) - && INTVAL (operands[2]) == 255)); + gcc_assert (operands[2] == constm1_rtx); return "dec{b}\t%h0"; } @@ -7274,60 +6608,6 @@ } [(set_attr "type" "lea") (set_attr "mode" "SI")]) - -;; Convert lea to the lea pattern to avoid flags dependency. -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (plus:DI (match_operand:DI 1 "register_operand" "") - (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && ix86_lea_for_add_ok (PLUS, insn, operands)" - [(set (match_dup 0) - (plus:DI (match_dup 1) - (match_dup 2)))] - "") - -;; Convert lea to the lea pattern to avoid flags dependency. -(define_split - [(set (match_operand 0 "register_operand" "") - (plus (match_operand 1 "register_operand" "") - (match_operand 2 "nonmemory_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" - [(const_int 0)] -{ - rtx pat; - /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) - may confuse gen_lowpart. */ - if (GET_MODE (operands[0]) != Pmode) - { - operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = gen_lowpart (Pmode, operands[2]); - } - operands[0] = gen_lowpart (SImode, operands[0]); - pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); - if (Pmode != SImode) - pat = gen_rtx_SUBREG (SImode, pat, 0); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); - DONE; -}) - -;; Convert lea to the lea pattern to avoid flags dependency. -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI - (plus:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" - [(set (match_dup 0) - (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] -{ - operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = gen_lowpart (Pmode, operands[2]); -}) ;; Subtract instructions @@ -7358,7 +6638,7 @@ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);") + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") (define_insn "*sub<mode>_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") @@ -7462,8 +6742,7 @@ (const_int 0)]) (match_operand:SWI 2 "<general_operand>" "")))) (clobber (reg:CC FLAGS_REG))])] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "") + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)") (define_insn "*<plusminus_insn><mode>3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") @@ -7517,10 +6796,10 @@ (compare:CCC (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") - (match_operand:SWI 2 "<general_operand>" "<r><i>m")) + (match_operand:SWI 2 "<general_operand>" "<g>")) (match_dup 1))) (clobber (match_scratch:SWI 0 "=<r>"))] - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "add{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) @@ -7572,8 +6851,7 @@ (plusminus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "TARGET_80387" - "") + "TARGET_80387") (define_expand "<plusminus_insn><mode>3" [(set (match_operand:MODEF 0 "register_operand" "") @@ -7581,8 +6859,7 @@ (match_operand:MODEF 1 "register_operand" "") (match_operand:MODEF 2 "nonimmediate_operand" "")))] "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" - "") + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") ;; Multiply instructions @@ -7591,9 +6868,7 @@ (mult:SWIM248 (match_operand:SWIM248 1 "register_operand" "") (match_operand:SWIM248 2 "<general_operand>" ""))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") + (clobber (reg:CC FLAGS_REG))])]) (define_expand "mulqi3" [(parallel [(set (match_operand:QI 0 "register_operand" "") @@ -7601,8 +6876,7 @@ (match_operand:QI 1 "register_operand" "") (match_operand:QI 2 "nonimmediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_QIMODE_MATH" - "") + "TARGET_QIMODE_MATH") ;; On AMDFAM10 ;; IMUL reg32/64, reg32/64, imm8 Direct @@ -7611,6 +6885,8 @@ ;; IMUL reg32/64, mem32/64, imm32 VectorPath ;; IMUL reg32/64, reg32/64 Direct ;; IMUL reg32/64, mem32/64 Direct +;; +;; On BDVER1, all above IMULs use DirectPath (define_insn "*mul<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r,r,r") @@ -7639,6 +6915,7 @@ (match_operand 1 "memory_operand" "")) (const_string "vector")] (const_string "direct"))) + (set_attr "bdver1_decode" "direct") (set_attr "mode" "<MODE>")]) (define_insn "*mulsi3_1_zext" @@ -7669,6 +6946,7 @@ (match_operand 1 "memory_operand" "")) (const_string "vector")] (const_string "direct"))) + (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;; On AMDFAM10 @@ -7678,6 +6956,8 @@ ;; IMUL reg16, mem16, imm16 VectorPath ;; IMUL reg16, reg16 Direct ;; IMUL reg16, mem16 Direct +;; +;; On BDVER1, all HI MULs use DoublePath (define_insn "*mulhi3_1" [(set (match_operand:HI 0 "register_operand" "=r,r,r") @@ -7702,9 +6982,10 @@ (cond [(eq_attr "alternative" "0,1") (const_string "vector")] (const_string "direct"))) + (set_attr "bdver1_decode" "double") (set_attr "mode" "HI")]) -;;On AMDFAM10 +;;On AMDFAM10 and BDVER1 ;; MUL reg8 Direct ;; MUL mem8 Direct @@ -7723,6 +7004,7 @@ (const_string "vector") (const_string "direct"))) (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "QI")]) (define_expand "<u>mul<mode><dwi>3" @@ -7732,9 +7014,7 @@ (match_operand:DWIH 1 "nonimmediate_operand" "")) (any_extend:<DWI> (match_operand:DWIH 2 "register_operand" "")))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") + (clobber (reg:CC FLAGS_REG))])]) (define_expand "<u>mulqihi3" [(parallel [(set (match_operand:HI 0 "register_operand" "") @@ -7744,8 +7024,7 @@ (any_extend:HI (match_operand:QI 2 "register_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_QIMODE_MATH" - "") + "TARGET_QIMODE_MATH") (define_insn "*<u>mul<mode><dwi>3_1" [(set (match_operand:<DWI> 0 "register_operand" "=A") @@ -7764,6 +7043,7 @@ (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "<MODE>")]) (define_insn "*<u>mulqihi3_1" @@ -7784,6 +7064,7 @@ (const_string "vector") (const_string "direct"))) (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "QI")]) (define_expand "<s>mul<mode>3_highpart" @@ -7823,6 +7104,7 @@ (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "DI")]) (define_insn "*<s>mulsi3_highpart_1" @@ -7846,6 +7128,7 @@ (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) (define_insn "*<s>mulsi3_highpart_zext" @@ -7869,6 +7152,7 @@ (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;; The patterns that match these are at the end of this file. @@ -7877,46 +7161,31 @@ [(set (match_operand:XF 0 "register_operand" "") (mult:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "TARGET_80387" - "") + "TARGET_80387") (define_expand "mul<mode>3" [(set (match_operand:MODEF 0 "register_operand" "") (mult:MODEF (match_operand:MODEF 1 "register_operand" "") (match_operand:MODEF 2 "nonimmediate_operand" "")))] "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" - "") + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") ;; Divide instructions -(define_insn "<u>divqi3" - [(set (match_operand:QI 0 "register_operand" "=a") - (any_div:QI - (match_operand:HI 1 "register_operand" "0") - (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_QIMODE_MATH" - "<sgnprefix>div{b}\t%2" - [(set_attr "type" "idiv") - (set_attr "mode" "QI")]) - ;; The patterns that match these are at the end of this file. (define_expand "divxf3" [(set (match_operand:XF 0 "register_operand" "") (div:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "TARGET_80387" - "") + "TARGET_80387") (define_expand "divdf3" [(set (match_operand:DF 0 "register_operand" "") (div:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] "(TARGET_80387 && X87_ENABLE_ARITH (DFmode)) - || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") + || (TARGET_SSE2 && TARGET_SSE_MATH)") (define_expand "divsf3" [(set (match_operand:SF 0 "register_operand" "") @@ -7944,9 +7213,61 @@ (match_operand:SWIM248 2 "nonimmediate_operand" ""))) (set (match_operand:SWIM248 3 "register_operand" "") (mod:SWIM248 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (div:SWI48 (match_operand:SWI48 2 "register_operand" "") + (match_operand:SWI48 3 "nonimmediate_operand" ""))) + (set (match_operand:SWI48 1 "register_operand" "") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (<MODE>mode, operands, true); DONE;") + +(define_insn_and_split "divmod<mode>4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SWI48 (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (mod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) (define_insn_and_split "*divmod<mode>4" [(set (match_operand:SWIM248 0 "register_operand" "=a") @@ -7996,6 +7317,68 @@ [(set_attr "type" "idiv") (set_attr "mode" "<MODE>")]) +(define_expand "divmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (div:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (set (match_operand:QI 3 "register_operand" "") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8)); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_MOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_DIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +;; Divide AX by r/m8, with result stored in +;; AL <- Quotient +;; AH <- Remainder +;; Change div/mod to HImode and extend the second argument to HImode +;; so that mode of div/mod matches with mode of arguments. Otherwise +;; combine may fail. +(define_insn "divmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (sign_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (sign_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + (define_expand "udivmod<mode>4" [(parallel [(set (match_operand:SWIM248 0 "register_operand" "") (udiv:SWIM248 @@ -8003,9 +7386,48 @@ (match_operand:SWIM248 2 "nonimmediate_operand" ""))) (set (match_operand:SWIM248 3 "register_operand" "") (umod:SWIM248 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "") + (match_operand:SWI48 3 "nonimmediate_operand" ""))) + (set (match_operand:SWI48 1 "register_operand" "") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (<MODE>mode, operands, false); DONE;") + +(define_insn_and_split "udivmod<mode>4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (umod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) (define_insn_and_split "*udivmod<mode>4" [(set (match_operand:SWIM248 0 "register_operand" "=a") @@ -8041,6 +7463,63 @@ [(set_attr "type" "idiv") (set_attr "mode" "<MODE>")]) +(define_expand "udivmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (udiv:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (set (match_operand:QI 3 "register_operand" "") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_zero_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8)); + tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_UMOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_UDIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +(define_insn "udivmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (zero_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + ;; We cannot use div/idiv for double division, because it causes ;; "division by zero" on the overflow and that's not what we expect ;; from truncate. Because true (non truncating) double division is @@ -8070,17 +7549,21 @@ (compare:CCNO (and:SI (match_operand:SI 0 "nonimmediate_operand" "") (match_operand:SI 1 "nonmemory_operand" "")) - (const_int 0)))] - "" - "") + (const_int 0)))]) (define_expand "testqi_ccz_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") (match_operand:QI 1 "nonmemory_operand" "")) - (const_int 0)))] - "" - "") + (const_int 0)))]) + +(define_expand "testdi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:DI (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "x86_64_szext_general_operand" "")) + (const_int 0)))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))") (define_insn "*testdi_1" [(set (reg FLAGS_REG) @@ -8150,9 +7633,7 @@ (const_int 8) (const_int 8)) (match_operand 1 "const_int_operand" "")) - (const_int 0)))] - "" - "") + (const_int 0)))]) (define_insn "*testqi_ext_0" [(set (reg FLAGS_REG) @@ -8944,7 +8425,7 @@ (const_int 0))) (clobber (match_scratch:SWI 0 "=<r>"))] "ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) @@ -9082,9 +8563,7 @@ (match_dup 1) (const_int 8) (const_int 8)) - (match_dup 2)))])] - "" - "") + (match_dup 2)))])]) (define_insn "*xorqi_cc_ext_1_rex64" [(set (reg FLAGS_REG) @@ -9165,7 +8644,7 @@ [(set (match_dup 2) (neg:DWIH (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 2, &operands[0], &operands[2]);") + "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);") (define_insn "*neg<mode>2_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") @@ -9450,10 +8929,7 @@ (match_operand:CSGNMODE 2 "register_operand" "")] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || (TARGET_SSE2 && (<MODE>mode == TFmode))" -{ - ix86_expand_copysign (operands); - DONE; -}) + "ix86_expand_copysign (operands); DONE;") (define_insn_and_split "copysign<mode>3_const" [(set (match_operand:CSGNMODE 0 "register_operand" "=x") @@ -9467,10 +8943,7 @@ "#" "&& reload_completed" [(const_int 0)] -{ - ix86_split_copysign_const (operands); - DONE; -}) + "ix86_split_copysign_const (operands); DONE;") (define_insn "copysign<mode>3_var" [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") @@ -9498,10 +8971,7 @@ || (TARGET_SSE2 && (<MODE>mode == TFmode))) && reload_completed" [(const_int 0)] -{ - ix86_split_copysign_var (operands); - DONE; -}) + "ix86_split_copysign_var (operands); DONE;") ;; One complement instructions @@ -9564,8 +9034,7 @@ (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1)) (const_int 0)])) (set (match_dup 1) - (xor:SWI (match_dup 3) (const_int -1)))])] - "") + (xor:SWI (match_dup 3) (const_int -1)))])]) ;; ??? Currently never generated - xor is used instead. (define_insn "*one_cmplsi2_2_zext" @@ -9592,8 +9061,7 @@ (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) (const_int 0)])) (set (match_dup 1) - (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])] - "") + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) ;; Shift instructions @@ -9673,7 +9141,8 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shld" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") @@ -9689,7 +9158,8 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) (define_expand "x86_shift<mode>_adj_1" [(set (reg:CCZ FLAGS_REG) @@ -9736,6 +9206,33 @@ DONE; }) +;; Avoid useless masking of count operand. +(define_insn_and_split "*ashl<mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (ashift:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (can_create_pseudo_p ()) + operands [2] = force_reg (SImode, operands[2]); + + operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); +} + [(set_attr "type" "ishift") + (set_attr "mode" "<MODE>")]) + (define_insn "*ashl<mode>3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l") @@ -10074,78 +9571,47 @@ ;; Convert lea to the lea pattern to avoid flags dependency. (define_split - [(set (match_operand:DI 0 "register_operand" "") - (ashift:DI (match_operand:DI 1 "index_register_operand" "") - (match_operand:QI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" - [(set (match_dup 0) - (mult:DI (match_dup 1) - (match_dup 2)))] - "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") - -;; Convert lea to the lea pattern to avoid flags dependency. -(define_split [(set (match_operand 0 "register_operand" "") (ashift (match_operand 1 "index_register_operand" "") (match_operand:QI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))] "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1]) - && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4" + && true_regnum (operands[0]) != true_regnum (operands[1])" [(const_int 0)] { rtx pat; enum machine_mode mode = GET_MODE (operands[0]); - if (GET_MODE_SIZE (mode) < 4) - operands[0] = gen_lowpart (SImode, operands[0]); if (mode != Pmode) operands[1] = gen_lowpart (Pmode, operands[1]); operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); - if (Pmode != SImode) + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); + + if (TARGET_64BIT && mode != Pmode) pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); DONE; }) -;; Rare case of shifting RSP is handled by generating move and shift -(define_split - [(set (match_operand 0 "register_operand" "") - (ashift (match_operand 1 "register_operand" "") - (match_operand:QI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" - [(const_int 0)] -{ - rtx pat, clob; - emit_move_insn (operands[0], operands[1]); - pat = gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_ASHIFT (GET_MODE (operands[0]), - operands[0], operands[2])); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); - DONE; -}) - ;; Convert lea to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI - (ashift:SI (match_operand:SI 1 "register_operand" "") + (ashift:SI (match_operand:SI 1 "index_register_operand" "") (match_operand:QI 2 "const_int_operand" "")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) - (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] -{ - operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); + (zero_extend:DI (subreg:SI (mult:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode); }) ;; This pattern can't accept a variable shift count, since shifts by @@ -10252,7 +9718,7 @@ (define_insn "*ashl<mode>3_cconly" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") + (ashift:SWI (match_operand:SWI 1 "register_operand" "0") (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) (const_int 0))) (clobber (match_scratch:SWI 0 "=<r>"))] @@ -10261,8 +9727,7 @@ || (operands[2] == const1_rtx && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) - && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" + && ix86_match_ccmode (insn, CCGOCmode)" { switch (get_attr_type (insn)) { @@ -10306,6 +9771,33 @@ "" "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") +;; Avoid useless masking of count operand. +(define_insn_and_split "*<shiftrt_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_shiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (can_create_pseudo_p ()) + operands [2] = force_reg (SImode, operands[2]); + + operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); +} + [(set_attr "type" "ishift") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "*<shiftrt_insn><mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=r") (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0") @@ -10347,7 +9839,8 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shrd" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") @@ -10363,7 +9856,8 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) (define_insn "ashrdi3_cvt" [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") @@ -10590,7 +10084,7 @@ [(set (reg FLAGS_REG) (compare (any_shiftrt:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:SWI 1 "register_operand" "0") (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) (const_int 0))) (clobber (match_scratch:SWI 0 "=<r>"))] @@ -10598,8 +10092,7 @@ || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) - && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + && ix86_match_ccmode (insn, CCGOCmode)" { if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) @@ -10658,6 +10151,33 @@ "" "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") +;; Avoid useless masking of count operand. +(define_insn_and_split "*<rotate_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_rotate:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (any_rotate:SWI48 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (can_create_pseudo_p ()) + operands [2] = force_reg (SImode, operands[2]); + + operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); +} + [(set_attr "type" "rotate") + (set_attr "mode" "<MODE>")]) + ;; Implement rotation using two double-precision ;; shift instructions and a scratch register. @@ -10686,7 +10206,7 @@ { operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); - split_<dwi> (&operands[0], 1, &operands[4], &operands[5]); + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn_and_split "ix86_rotr<dwi>3_doubleword" @@ -10714,7 +10234,7 @@ { operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); - split_<dwi> (&operands[0], 1, &operands[4], &operands[5]); + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn "*<rotate_insn><mode>3_1" @@ -10798,8 +10318,7 @@ && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))" [(parallel [(set (strict_low_part (match_dup 0)) (bswap:HI (match_dup 0))) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) ;; Bit set / bit test instructions @@ -10844,6 +10363,8 @@ (match_operand 3 "register_operand" ""))] "" { + rtx (*gen_mov_insv_1) (rtx, rtx); + /* Handle insertions to %ah et al. */ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) FAIL; @@ -10853,11 +10374,10 @@ if (! ext_register_operand (operands[0], VOIDmode)) FAIL; - if (TARGET_64BIT) - emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); - else - emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); - + gen_mov_insv_1 = (TARGET_64BIT + ? gen_movdi_insv_1 : gen_movsi_insv_1); + + emit_insn (gen_mov_insv_1 (operands[0], operands[3])); DONE; }) @@ -11107,9 +10627,7 @@ (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] -{ - PUT_MODE (operands[1], QImode); -}) + "PUT_MODE (operands[1], QImode);") (define_split [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) @@ -11118,9 +10636,7 @@ (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] -{ - PUT_MODE (operands[1], QImode); -}) + "PUT_MODE (operands[1], QImode);") (define_split [(set (match_operand:QI 0 "nonimmediate_operand" "") @@ -11249,9 +10765,7 @@ (if_then_else (match_dup 0) (label_ref (match_dup 1)) (pc)))] -{ - PUT_MODE (operands[0], VOIDmode); -}) + "PUT_MODE (operands[0], VOIDmode);") (define_split [(set (pc) @@ -11420,7 +10934,7 @@ ;; Define combination compare-and-branch fp compare instructions to help ;; combine. -(define_insn "*fp_jcc_3_387" +(define_insn "*fp_jcc_1_387" [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(match_operand 1 "register_operand" "f") @@ -11438,7 +10952,7 @@ && !TARGET_CMOVE" "#") -(define_insn "*fp_jcc_4_387" +(define_insn "*fp_jcc_1r_387" [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(match_operand 1 "register_operand" "f") @@ -11456,7 +10970,7 @@ && !TARGET_CMOVE" "#") -(define_insn "*fp_jcc_5_387" +(define_insn "*fp_jcc_2_387" [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(match_operand 1 "register_operand" "f") @@ -11471,7 +10985,7 @@ && !TARGET_CMOVE" "#") -(define_insn "*fp_jcc_6_387" +(define_insn "*fp_jcc_2r_387" [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(match_operand 1 "register_operand" "f") @@ -11486,7 +11000,7 @@ && !TARGET_CMOVE" "#") -(define_insn "*fp_jcc_7_387" +(define_insn "*fp_jcc_3_387" [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(match_operand 1 "register_operand" "f") @@ -11503,29 +11017,6 @@ && !TARGET_CMOVE" "#") -;; The order of operands in *fp_jcc_8_387 is forced by combine in -;; simplify_comparison () function. Float operator is treated as RTX_OBJ -;; with a precedence over other operators and is always put in the first -;; place. Swap condition and operands to match ficom instruction. - -(define_insn "*fp_jcc_8<mode>_387" - [(set (pc) - (if_then_else (match_operator 0 "ix86_fp_comparison_operator" - [(match_operator 1 "float_operator" - [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) - (match_operand 3 "register_operand" "f,f")]) - (label_ref (match_operand 4 "" "")) - (pc))) - (clobber (reg:CCFP FPSR_REG)) - (clobber (reg:CCFP FLAGS_REG)) - (clobber (match_scratch:HI 5 "=a,a"))] - "X87_FLOAT_MODE_P (GET_MODE (operands[3])) - && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun)) - && GET_MODE (operands[1]) == GET_MODE (operands[3]) - && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode - && !TARGET_CMOVE" - "#") - (define_split [(set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" @@ -11561,12 +11052,37 @@ DONE; }) +;; The order of operands in *fp_jcc_4_387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*fp_jcc_4_<mode>_387" + [(set (pc) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) + (match_operand 3 "register_operand" "f,f")]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a,a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode + && !TARGET_CMOVE" + "#") + (define_split [(set (pc) - (if_then_else (match_operator 0 "ix86_fp_comparison_operator" - [(match_operator 1 "float_operator" - [(match_operand:X87MODEI12 2 "memory_operand" "")]) - (match_operand 3 "register_operand" "")]) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "")]) + (match_operand 3 "register_operand" "")]) (match_operand 4 "" "") (match_operand 5 "" ""))) (clobber (reg:CCFP FPSR_REG)) @@ -11586,10 +11102,11 @@ ;; %%% Kill this when reload knows how to do it. (define_split [(set (pc) - (if_then_else (match_operator 0 "ix86_fp_comparison_operator" - [(match_operator 1 "float_operator" - [(match_operand:X87MODEI12 2 "register_operand" "")]) - (match_operand 3 "register_operand" "")]) + (if_then_else + (match_operator 0 "ix86_swapped_fp_comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "register_operand" "")]) + (match_operand 3 "register_operand" "")]) (match_operand 4 "" "") (match_operand 5 "" ""))) (clobber (reg:CCFP FPSR_REG)) @@ -11750,6 +11267,22 @@ DONE; }) +(define_insn_and_split "*call_pop_0_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_pop_0" [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) (match_operand:SI 1 "" "")) @@ -11765,6 +11298,22 @@ } [(set_attr "type" "call")]) +(define_insn_and_split "*call_pop_1_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_pop_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand:SI 1 "" "")) @@ -11779,6 +11328,22 @@ } [(set_attr "type" "call")]) +(define_insn_and_split "*sibcall_pop_1_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i")))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*sibcall_pop_1" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) (match_operand:SI 1 "" "")) @@ -11811,36 +11376,74 @@ DONE; }) +(define_insn_and_split "*call_0_vzeroupper" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_0" [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) (match_operand 1 "" ""))] "" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; -} + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*call_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand 1 "" ""))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (constant_call_address_operand (operands[0], Pmode)) - return "call\t%P0"; - return "call\t%A0"; -} + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*sibcall_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*sibcall_1" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) (match_operand 1 "" ""))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" - "@ - jmp\t%P0 - jmp\t%A0" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*call_1_rex64" @@ -11848,11 +11451,33 @@ (match_operand 1 "" ""))] "TARGET_64BIT && !SIBLING_CALL_P (insn) && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" -{ - if (constant_call_address_operand (operands[0], Pmode)) - return "call\t%P0"; - return "call\t%A0"; -} + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*call_1_rex64_ms_sysv" @@ -11872,27 +11497,45 @@ (clobber (reg:DI SI_REG)) (clobber (reg:DI DI_REG))] "TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (constant_call_address_operand (operands[0], Pmode)) - return "call\t%P0"; - return "call\t%A0"; -} + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*call_1_rex64_large_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*call_1_rex64_large" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) (match_operand 1 "" ""))] "TARGET_64BIT && !SIBLING_CALL_P (insn)" - "call\t%A0" + { return ix86_output_call_insn (insn, operands[0], 0); } + [(set_attr "type" "call")]) + +(define_insn_and_split "*sibcall_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" [(set_attr "type" "call")]) (define_insn "*sibcall_1_rex64" [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) (match_operand 1 "" ""))] "TARGET_64BIT && SIBLING_CALL_P (insn)" - "@ - jmp\t%P0 - jmp\t%A0" + { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) ;; Call subroutine, returning value in operand 0 @@ -12083,13 +11726,22 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) -(define_insn "vswapmov" - [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "register_operand" "r")) - (unspec_volatile [(const_int 0)] UNSPECV_VSWAPMOV)] - "" - "movl.s\t{%1, %0|%0, %1}" - [(set_attr "length" "2") +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] + UNSPECV_NOPS)] + "reload_completed" +{ + int num = INTVAL (operands[0]); + + gcc_assert (num >= 1 && num <= 8); + + while (num--) + fputs ("\tnop\n", asm_out_file); + + return ""; +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) @@ -12123,7 +11775,7 @@ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - { return output_set_got (operands[0], NULL_RTX); } + "* return output_set_got (operands[0], NULL_RTX);" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -12133,7 +11785,7 @@ UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - { return output_set_got (operands[0], operands[1]); } + "* return output_set_got (operands[0], operands[1]);" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -12220,6 +11872,64 @@ "leave" [(set_attr "type" "leave")]) +;; Handle -fsplit-stack. + +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + ix86_expand_split_stack_prologue (); + DONE; +}) + +;; In order to support the call/return predictor, we use a return +;; instruction which the middle-end doesn't see. +(define_insn "split_stack_return" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_SPLIT_STACK_RETURN)] + "" +{ + if (operands[0] == const0_rtx) + return "ret"; + else + return "ret\t%0"; +} + [(set_attr "atom_unit" "jeu") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 1) + (const_int 3))) + (set (attr "length_immediate") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 0) + (const_int 2)))]) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. + +(define_expand "split_stack_space_check" + [(set (pc) (if_then_else + (ltu (minus (reg SP_REG) + (match_operand 0 "register_operand" "")) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" +{ + rtx reg, size, limit; + + reg = gen_reg_rtx (Pmode); + size = force_reg (Pmode, operands[0]); + emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size)); + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit)); + ix86_expand_branch (GEU, reg, limit, operands[1]); + + DONE; +}) + ;; Bit manipulation instructions. (define_expand "ffs<mode>2" @@ -12283,13 +11993,19 @@ (set_attr "mode" "<MODE>")]) (define_insn "ctz<mode>2" - [(set (match_operand:SWI48 0 "register_operand" "=r") - (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "" - "bsf{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; +} [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") + (set (attr "prefix_rep") (symbol_ref "TARGET_BMI")) (set_attr "mode" "<MODE>")]) (define_expand "clz<mode>2" @@ -12316,12 +12032,212 @@ [(set (match_operand:SWI248 0 "register_operand" "=r") (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_ABM" + "TARGET_ABM || TARGET_BMI" "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +;; BMI instructions. +(define_insn "*bmi_andn_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (not:SWI48 + (match_operand:SWI48 1 "register_operand" "r")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "andn\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "bmi_bextr_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")] + UNSPEC_BEXTR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "bextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsi_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (neg:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsi\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsr_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +;; TBM instructions. +(define_insn "tbm_bextri_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "const_0_to_255_operand" "n") + (match_operand:SWI48 3 "const_0_to_255_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3])); + return "bextr\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcfill_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blci_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (not:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1))) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blci\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcic_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcs_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcs\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blsfill_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blsic_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_t1mskc_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "t1mskc\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_tzmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "tzmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) @@ -12521,7 +12437,8 @@ (define_insn_and_split "paritydi2_cmp" [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:DI 3 "register_operand" "0"))) + (unspec:CC [(match_operand:DI 3 "register_operand" "0")] + UNSPEC_PARITY)) (clobber (match_scratch:DI 0 "=r")) (clobber (match_scratch:SI 1 "=&r")) (clobber (match_scratch:HI 2 "=Q"))] @@ -12534,7 +12451,7 @@ (clobber (reg:CC FLAGS_REG))]) (parallel [(set (reg:CC FLAGS_REG) - (parity:CC (match_dup 1))) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) (clobber (match_dup 1)) (clobber (match_dup 2))])] { @@ -12551,7 +12468,8 @@ (define_insn_and_split "paritysi2_cmp" [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:SI 2 "register_operand" "0"))) + (unspec:CC [(match_operand:SI 2 "register_operand" "0")] + UNSPEC_PARITY)) (clobber (match_scratch:SI 0 "=r")) (clobber (match_scratch:HI 1 "=&Q"))] "! TARGET_POPCNT" @@ -12563,7 +12481,7 @@ (clobber (reg:CC FLAGS_REG))]) (parallel [(set (reg:CC FLAGS_REG) - (parity:CC (match_dup 1))) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) (clobber (match_dup 1))])] { operands[3] = gen_lowpart (HImode, operands[2]); @@ -12574,20 +12492,13 @@ (define_insn "*parityhi2_cmp" [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:HI 1 "register_operand" "0"))) + (unspec:CC [(match_operand:HI 1 "register_operand" "0")] + UNSPEC_PARITY)) (clobber (match_scratch:HI 0 "=Q"))] "! TARGET_POPCNT" "xor{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "length" "2") (set_attr "mode" "HI")]) - -(define_insn "*parityqi2_cmp" - [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:QI 0 "register_operand" "q")))] - "! TARGET_POPCNT" - "test{b}\t%0, %0" - [(set_attr "length" "2") - (set_attr "mode" "QI")]) ;; Thread-local storage patterns for ELF. ;; @@ -12748,58 +12659,47 @@ UNSPEC_TLS_GD)) (clobber (match_dup 4)) (clobber (match_dup 5)) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) + +;; Segment register for the thread base ptr load +(define_mode_attr tp_seg [(SI "gs") (DI "fs")]) ;; Load and add the thread base pointer from %gs:0. - -(define_insn "*load_tp_si" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_TP))] - "!TARGET_64BIT" - "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" +(define_insn "*load_tp_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(const_int 0)] UNSPEC_TP))] + "" + "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") (set_attr "memory" "load") (set_attr "imm_disp" "false")]) -(define_insn "*add_tp_si" - [(set (match_operand:SI 0 "register_operand" "=r") - (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) - (match_operand:SI 1 "register_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT" - "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" +(define_insn "*add_tp_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (unspec:P [(const_int 0)] UNSPEC_TP) + (match_operand:P 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" [(set_attr "type" "alu") (set_attr "modrm" "0") (set_attr "length" "7") (set_attr "memory" "load") (set_attr "imm_disp" "false")]) -(define_insn "*load_tp_di" - [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(const_int 0)] UNSPEC_TP))] - "TARGET_64BIT" - "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) - -(define_insn "*add_tp_di" - [(set (match_operand:DI 0 "register_operand" "=r") - (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP) - (match_operand:DI 1 "register_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" - [(set_attr "type" "alu") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) +;; The Sun linker took the AMD64 TLS spec literally and can only handle +;; %rax as destination of the initial executable code sequence. +(define_insn "tls_initial_exec_64_sun" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_IE_SUN)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_SUN_TLS" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}\n\tadd{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}" + [(set_attr "type" "multi")]) ;; GNU2 TLS patterns can be split. @@ -12928,8 +12828,6 @@ operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); }) - -;; ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and @@ -13426,7 +13324,8 @@ [(set_attr "type" "fpspc") (set_attr "mode" "XF") (set_attr "athlon_decode" "direct") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) (define_insn "sqrt_extend<mode>xf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") @@ -13438,7 +13337,8 @@ [(set_attr "type" "fpspc") (set_attr "mode" "XF") (set_attr "athlon_decode" "direct") - (set_attr "amdfam10_decode" "direct")]) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) (define_insn "*rsqrtsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x") @@ -13472,7 +13372,8 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "*") - (set_attr "amdfam10_decode" "*")]) + (set_attr "amdfam10_decode" "*") + (set_attr "bdver1_decode" "*")]) (define_expand "sqrt<mode>2" [(set (match_operand:MODEF 0 "register_operand" "") @@ -13546,6 +13447,8 @@ (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { + rtx (*gen_truncxf) (rtx, rtx); + rtx label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); @@ -13562,10 +13465,11 @@ /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) - emit_insn (gen_truncxf<mode>2 (operands[0], op1)); - else - emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1)); - + gen_truncxf = gen_truncxf<mode>2; + else + gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); DONE; }) @@ -13614,6 +13518,8 @@ (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { + rtx (*gen_truncxf) (rtx, rtx); + rtx label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); @@ -13631,10 +13537,11 @@ /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) - emit_insn (gen_truncxf<mode>2 (operands[0], op1)); - else - emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1)); - + gen_truncxf = gen_truncxf<mode>2; + else + gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); DONE; }) @@ -13708,8 +13615,7 @@ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) && !(reload_completed || reload_in_progress)" - [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] - "") + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]) (define_split [(set (match_operand:XF 0 "register_operand" "") @@ -13719,8 +13625,7 @@ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) && !(reload_completed || reload_in_progress)" - [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] - "") + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]) (define_insn "sincos_extend<mode>xf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") @@ -13746,8 +13651,8 @@ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) && !(reload_completed || reload_in_progress)" - [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))] - "") + [(set (match_dup 1) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]) (define_split [(set (match_operand:XF 0 "register_operand" "") @@ -13758,8 +13663,8 @@ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) && !(reload_completed || reload_in_progress)" - [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))] - "") + [(set (match_dup 0) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]) (define_expand "sincos<mode>3" [(use (match_operand:MODEF 0 "register_operand" "")) @@ -13875,8 +13780,7 @@ UNSPEC_FPATAN)) (clobber (match_scratch:XF 3 ""))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "") + && flag_unsafe_math_optimizations") (define_expand "atan2<mode>3" [(use (match_operand:MODEF 0 "register_operand" "")) @@ -14227,9 +14131,7 @@ (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (XFmode); -}) + "operands[2] = gen_reg_rtx (XFmode);") (define_expand "logb<mode>2" [(use (match_operand:MODEF 0 "register_operand" "")) @@ -14613,9 +14515,7 @@ (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (XFmode); -}) + "operands[2] = gen_reg_rtx (XFmode);") (define_expand "significand<mode>2" [(use (match_operand:MODEF 0 "register_operand" "")) @@ -14758,8 +14658,7 @@ "reload_completed" [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) (clobber (match_dup 3))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -14769,8 +14668,7 @@ (clobber (match_scratch 3 ""))] "reload_completed" [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) - (clobber (match_dup 3))])] - "") + (clobber (match_dup 3))])]) (define_insn_and_split "*fist<mode>2_1" [(set (match_operand:X87MODEI12 0 "register_operand" "") @@ -14816,8 +14714,7 @@ (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] "reload_completed" [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_split [(set (match_operand:X87MODEI12 0 "memory_operand" "") @@ -14825,23 +14722,20 @@ UNSPEC_FIST)) (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] "reload_completed" - [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))] - "") + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]) (define_expand "lrintxf<mode>2" [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST))] - "TARGET_USE_FANCY_MATH_387" - "") + "TARGET_USE_FANCY_MATH_387") (define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2" [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] UNSPEC_FIX_NOTRUNC))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH - && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)" - "") + && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)") (define_expand "lround<MODEF:mode><SSEMODEI24:mode>2" [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") @@ -15022,8 +14916,7 @@ (use (match_dup 2)) (use (match_dup 3)) (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -15037,8 +14930,7 @@ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) (use (match_dup 2)) (use (match_dup 3)) - (clobber (match_dup 5))])] - "") + (clobber (match_dup 5))])]) (define_insn "fist<mode>2_floor" [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") @@ -15079,8 +14971,7 @@ UNSPEC_FIST_FLOOR)) (use (match_dup 2)) (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:X87MODEI12 0 "memory_operand" "") @@ -15093,8 +14984,7 @@ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST_FLOOR)) (use (match_dup 2)) - (use (match_dup 3))])] - "") + (use (match_dup 3))])]) (define_expand "lfloorxf<mode>2" [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") @@ -15103,8 +14993,7 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "") + && flag_unsafe_math_optimizations") (define_expand "lfloor<MODEF:mode><SWI48:mode>2" [(match_operand:SWI48 0 "nonimmediate_operand" "") @@ -15284,8 +15173,7 @@ (use (match_dup 2)) (use (match_dup 3)) (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -15299,8 +15187,7 @@ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) (use (match_dup 2)) (use (match_dup 3)) - (clobber (match_dup 5))])] - "") + (clobber (match_dup 5))])]) (define_insn "fist<mode>2_ceil" [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") @@ -15341,8 +15228,7 @@ UNSPEC_FIST_CEIL)) (use (match_dup 2)) (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))] - "") + (set (match_dup 0) (match_dup 4))]) (define_split [(set (match_operand:X87MODEI12 0 "memory_operand" "") @@ -15355,8 +15241,7 @@ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST_CEIL)) (use (match_dup 2)) - (use (match_dup 3))])] - "") + (use (match_dup 3))])]) (define_expand "lceilxf<mode>2" [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") @@ -15365,8 +15250,7 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "") + && flag_unsafe_math_optimizations") (define_expand "lceil<MODEF:mode><SWI48:mode>2" [(match_operand:SWI48 0 "nonimmediate_operand" "") @@ -15516,7 +15400,6 @@ && flag_unsafe_math_optimizations" { emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); - DONE; }) @@ -15636,18 +15519,65 @@ DONE; }) -(define_expand "signbit<mode>2" +(define_expand "signbitxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +(define_insn "movmsk_df" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" + "%vmovmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +;; Use movmskpd in SSE mode to avoid store forwarding stall +;; for 32bit targets and movq+shrq sequence for 64bit targets. +(define_expand "signbitdf2" [(use (match_operand:SI 0 "register_operand" "")) - (use (match_operand:X87MODEF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" -{ - rtx mask = GEN_INT (0x0200); - + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" +{ + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH) + { + emit_insn (gen_movmsk_df (operands[0], operands[1])); + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + } + else + { + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamdf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + } + DONE; +}) + +(define_expand "signbitsf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" +{ rtx scratch = gen_reg_rtx (HImode); - emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1])); - emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + emit_insn (gen_fxamsf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); DONE; }) @@ -15661,11 +15591,11 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) -(define_expand "movmemsi" +(define_expand "movmem<mode>" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:SI 2 "nonmemory_operand" "")) - (use (match_operand:SI 3 "const_int_operand" "")) + (use (match_operand:SWI48 2 "nonmemory_operand" "")) + (use (match_operand:SWI48 3 "const_int_operand" "")) (use (match_operand:SI 4 "const_int_operand" "")) (use (match_operand:SI 5 "const_int_operand" ""))] "" @@ -15677,22 +15607,6 @@ FAIL; }) -(define_expand "movmemdi" - [(use (match_operand:BLK 0 "memory_operand" "")) - (use (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:DI 2 "nonmemory_operand" "")) - (use (match_operand:DI 3 "const_int_operand" "")) - (use (match_operand:SI 4 "const_int_operand" "")) - (use (match_operand:SI 5 "const_int_operand" ""))] - "TARGET_64BIT" -{ - if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], - operands[4], operands[5])) - DONE; - else - FAIL; -}) - ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. @@ -15747,98 +15661,57 @@ "TARGET_64BIT" "movsq" [(set_attr "type" "str") - (set_attr "mode" "DI") - (set_attr "memory" "both")]) + (set_attr "memory" "both") + (set_attr "mode" "DI")]) (define_insn "*strmovsi_1" - [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) - (mem:SI (match_operand:SI 3 "register_operand" "1"))) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 2) - (const_int 4))) - (set (match_operand:SI 1 "register_operand" "=S") - (plus:SI (match_dup 3) - (const_int 4)))] - "!TARGET_64BIT" + [(set (mem:SI (match_operand:P 2 "register_operand" "0")) + (mem:SI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 4))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 4)))] + "" "movs{l|d}" [(set_attr "type" "str") - (set_attr "mode" "SI") - (set_attr "memory" "both")]) - -(define_insn "*strmovsi_rex_1" - [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) - (mem:SI (match_operand:DI 3 "register_operand" "1"))) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 2) - (const_int 4))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (match_dup 3) - (const_int 4)))] - "TARGET_64BIT" - "movs{l|d}" - [(set_attr "type" "str") - (set_attr "mode" "SI") - (set_attr "memory" "both")]) + (set_attr "memory" "both") + (set_attr "mode" "SI")]) (define_insn "*strmovhi_1" - [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) - (mem:HI (match_operand:SI 3 "register_operand" "1"))) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 2) - (const_int 2))) - (set (match_operand:SI 1 "register_operand" "=S") - (plus:SI (match_dup 3) - (const_int 2)))] - "!TARGET_64BIT" - "movsw" - [(set_attr "type" "str") - (set_attr "memory" "both") - (set_attr "mode" "HI")]) - -(define_insn "*strmovhi_rex_1" - [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) - (mem:HI (match_operand:DI 3 "register_operand" "1"))) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 2) - (const_int 2))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (match_dup 3) - (const_int 2)))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:P 2 "register_operand" "0")) + (mem:HI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 2))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 2)))] + "" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) (define_insn "*strmovqi_1" - [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) - (mem:QI (match_operand:SI 3 "register_operand" "1"))) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 2) - (const_int 1))) - (set (match_operand:SI 1 "register_operand" "=S") - (plus:SI (match_dup 3) - (const_int 1)))] - "!TARGET_64BIT" + [(set (mem:QI (match_operand:P 2 "register_operand" "0")) + (mem:QI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 1))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 1)))] + "" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") - (set_attr "mode" "QI")]) - -(define_insn "*strmovqi_rex_1" - [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) - (mem:QI (match_operand:DI 3 "register_operand" "1"))) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 2) - (const_int 1))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (match_dup 3) - (const_int 1)))] - "TARGET_64BIT" - "movsb" - [(set_attr "type" "str") - (set_attr "memory" "both") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "rep_mov" @@ -15873,37 +15746,18 @@ (set_attr "mode" "DI")]) (define_insn "*rep_movsi" - [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") - (const_int 2)) - (match_operand:SI 3 "register_operand" "0"))) - (set (match_operand:SI 1 "register_operand" "=S") - (plus:SI (ashift:SI (match_dup 5) (const_int 2)) - (match_operand:SI 4 "register_operand" "1"))) + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (ashift:P (match_dup 5) (const_int 2)) + (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] - "!TARGET_64BIT" - "rep{%;} movs{l|d}" - [(set_attr "type" "str") - (set_attr "prefix_rep" "1") - (set_attr "memory" "both") - (set_attr "mode" "SI")]) - -(define_insn "*rep_movsi_rex64" - [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") - (const_int 2)) - (match_operand:DI 3 "register_operand" "0"))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (ashift:DI (match_dup 5) (const_int 2)) - (match_operand:DI 4 "register_operand" "1"))) - (set (mem:BLK (match_dup 3)) - (mem:BLK (match_dup 4))) - (use (match_dup 5))] - "TARGET_64BIT" + "" "rep{%;} movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15911,42 +15765,25 @@ (set_attr "mode" "SI")]) (define_insn "*rep_movqi" - [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_operand:SI 3 "register_operand" "0") - (match_operand:SI 5 "register_operand" "2"))) - (set (match_operand:SI 1 "register_operand" "=S") - (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5))) + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 5 "register_operand" "2"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] - "!TARGET_64BIT" + "" "rep{%;} movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") - (set_attr "mode" "SI")]) - -(define_insn "*rep_movqi_rex64" - [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_operand:DI 3 "register_operand" "0") - (match_operand:DI 5 "register_operand" "2"))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5))) - (set (mem:BLK (match_dup 3)) - (mem:BLK (match_dup 4))) - (use (match_dup 5))] - "TARGET_64BIT" - "rep{%;} movsb" - [(set_attr "type" "str") - (set_attr "prefix_rep" "1") - (set_attr "memory" "both") - (set_attr "mode" "SI")]) - -(define_expand "setmemsi" + (set_attr "mode" "QI")]) + +(define_expand "setmem<mode>" [(use (match_operand:BLK 0 "memory_operand" "")) - (use (match_operand:SI 1 "nonmemory_operand" "")) + (use (match_operand:SWI48 1 "nonmemory_operand" "")) (use (match_operand 2 "const_int_operand" "")) (use (match_operand 3 "const_int_operand" "")) (use (match_operand:SI 4 "const_int_operand" "")) @@ -15961,23 +15798,6 @@ FAIL; }) -(define_expand "setmemdi" - [(use (match_operand:BLK 0 "memory_operand" "")) - (use (match_operand:DI 1 "nonmemory_operand" "")) - (use (match_operand 2 "const_int_operand" "")) - (use (match_operand 3 "const_int_operand" "")) - (use (match_operand 4 "const_int_operand" "")) - (use (match_operand 5 "const_int_operand" ""))] - "TARGET_64BIT" -{ - if (ix86_expand_setmem (operands[0], operands[1], - operands[2], operands[3], - operands[4], operands[5])) - DONE; - else - FAIL; -}) - ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. @@ -16026,76 +15846,44 @@ (set_attr "mode" "DI")]) (define_insn "*strsetsi_1" - [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) + [(set (mem:SI (match_operand:P 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 1) - (const_int 4)))] - "!TARGET_64BIT" - "stos{l|d}" - [(set_attr "type" "str") - (set_attr "memory" "store") - (set_attr "mode" "SI")]) - -(define_insn "*strsetsi_rex_1" - [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) - (match_operand:SI 2 "register_operand" "a")) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 1) - (const_int 4)))] - "TARGET_64BIT" + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 4)))] + "" "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*strsethi_1" - [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) + [(set (mem:HI (match_operand:P 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 1) - (const_int 2)))] - "!TARGET_64BIT" - "stosw" - [(set_attr "type" "str") - (set_attr "memory" "store") - (set_attr "mode" "HI")]) - -(define_insn "*strsethi_rex_1" - [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) - (match_operand:HI 2 "register_operand" "a")) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 1) - (const_int 2)))] - "TARGET_64BIT" + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 2)))] + "" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*strsetqi_1" - [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) + [(set (mem:QI (match_operand:P 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_dup 1) - (const_int 1)))] - "!TARGET_64BIT" + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 1)))] + "" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") - (set_attr "mode" "QI")]) - -(define_insn "*strsetqi_rex_1" - [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) - (match_operand:QI 2 "register_operand" "a")) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_dup 1) - (const_int 1)))] - "TARGET_64BIT" - "stosb" - [(set_attr "type" "str") - (set_attr "memory" "store") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "rep_stos" @@ -16126,33 +15914,16 @@ (set_attr "mode" "DI")]) (define_insn "*rep_stossi" - [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") - (const_int 2)) - (match_operand:SI 3 "register_operand" "0"))) + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] - "!TARGET_64BIT" - "rep{%;} stos{l|d}" - [(set_attr "type" "str") - (set_attr "prefix_rep" "1") - (set_attr "memory" "store") - (set_attr "mode" "SI")]) - -(define_insn "*rep_stossi_rex64" - [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") - (const_int 2)) - (match_operand:DI 3 "register_operand" "0"))) - (set (mem:BLK (match_dup 3)) - (const_int 0)) - (use (match_operand:SI 2 "register_operand" "a")) - (use (match_dup 4))] - "TARGET_64BIT" + "" "rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -16160,36 +15931,24 @@ (set_attr "mode" "SI")]) (define_insn "*rep_stosqi" - [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:SI 0 "register_operand" "=D") - (plus:SI (match_operand:SI 3 "register_operand" "0") - (match_operand:SI 4 "register_operand" "1"))) + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] - "!TARGET_64BIT" + "" "rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") - (set_attr "mode" "QI")]) - -(define_insn "*rep_stosqi_rex64" - [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (match_operand:DI 3 "register_operand" "0") - (match_operand:DI 4 "register_operand" "1"))) - (set (mem:BLK (match_dup 3)) - (const_int 0)) - (use (match_operand:QI 2 "register_operand" "a")) - (use (match_dup 4))] - "TARGET_64BIT" - "rep{%;} stosb" - [(set_attr "type" "str") - (set_attr "prefix_rep" "1") - (set_attr "memory" "store") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "cmpstrnsi" @@ -16240,13 +15999,12 @@ } else { - rtx (*cmp_insn)(rtx, rtx); - - if (TARGET_64BIT) - cmp_insn = gen_cmpdi_1; - else - cmp_insn = gen_cmpsi_1; - emit_insn (cmp_insn (countreg, countreg)); + rtx (*gen_cmp) (rtx, rtx); + + gen_cmp = (TARGET_64BIT + ? gen_cmpdi_1 : gen_cmpsi_1); + + emit_insn (gen_cmp (countreg, countreg)); emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, operands[1], operands[2])); } @@ -16273,8 +16031,10 @@ (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" - "operands[1] = gen_reg_rtx (QImode); - operands[2] = gen_reg_rtx (QImode);") +{ + operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode); +}) ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. @@ -16293,33 +16053,22 @@ (define_insn "*cmpstrnqi_nz_1" [(set (reg:CC FLAGS_REG) - (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) - (mem:BLK (match_operand:SI 5 "register_operand" "1")))) - (use (match_operand:SI 6 "register_operand" "2")) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1")))) + (use (match_operand:P 6 "register_operand" "2")) (use (match_operand:SI 3 "immediate_operand" "i")) - (clobber (match_operand:SI 0 "register_operand" "=S")) - (clobber (match_operand:SI 1 "register_operand" "=D")) - (clobber (match_operand:SI 2 "register_operand" "=c"))] - "!TARGET_64BIT" + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "" "repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") - (set_attr "prefix_rep" "1")]) - -(define_insn "*cmpstrnqi_nz_rex_1" - [(set (reg:CC FLAGS_REG) - (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) - (mem:BLK (match_operand:DI 5 "register_operand" "1")))) - (use (match_operand:DI 6 "register_operand" "2")) - (use (match_operand:SI 3 "immediate_operand" "i")) - (clobber (match_operand:DI 0 "register_operand" "=S")) - (clobber (match_operand:DI 1 "register_operand" "=D")) - (clobber (match_operand:DI 2 "register_operand" "=c"))] - "TARGET_64BIT" - "repz{%;} cmpsb" - [(set_attr "type" "str") - (set_attr "mode" "QI") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "prefix_rep" "1")]) ;; The same, but the count is not known to not be zero. @@ -16341,59 +16090,33 @@ (define_insn "*cmpstrnqi_1" [(set (reg:CC FLAGS_REG) - (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") + (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2") (const_int 0)) - (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) - (mem:BLK (match_operand:SI 5 "register_operand" "1"))) - (const_int 0))) - (use (match_operand:SI 3 "immediate_operand" "i")) - (use (reg:CC FLAGS_REG)) - (clobber (match_operand:SI 0 "register_operand" "=S")) - (clobber (match_operand:SI 1 "register_operand" "=D")) - (clobber (match_operand:SI 2 "register_operand" "=c"))] - "!TARGET_64BIT" - "repz{%;} cmpsb" - [(set_attr "type" "str") - (set_attr "mode" "QI") - (set_attr "prefix_rep" "1")]) - -(define_insn "*cmpstrnqi_rex_1" - [(set (reg:CC FLAGS_REG) - (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") - (const_int 0)) - (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) - (mem:BLK (match_operand:DI 5 "register_operand" "1"))) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1"))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "i")) (use (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 0 "register_operand" "=S")) - (clobber (match_operand:DI 1 "register_operand" "=D")) - (clobber (match_operand:DI 2 "register_operand" "=c"))] - "TARGET_64BIT" + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "" "repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "prefix_rep" "1")]) -(define_expand "strlensi" - [(set (match_operand:SI 0 "register_operand" "") - (unspec:SI [(match_operand:BLK 1 "general_operand" "") - (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] - "" -{ - if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) - DONE; - else - FAIL; -}) - -(define_expand "strlendi" - [(set (match_operand:DI 0 "register_operand" "") - (unspec:DI [(match_operand:BLK 1 "general_operand" "") - (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] +(define_expand "strlen<mode>" + [(set (match_operand:SWI48x 0 "register_operand" "") + (unspec:SWI48x [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] + UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -16403,39 +16126,30 @@ }) (define_expand "strlenqi_1" - [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" "")) + [(parallel [(set (match_operand 0 "register_operand" "") + (match_operand 2 "" "")) (clobber (match_operand 1 "register_operand" "")) (clobber (reg:CC FLAGS_REG))])] "" "ix86_current_function_needs_cld = 1;") (define_insn "*strlenqi_1" - [(set (match_operand:SI 0 "register_operand" "=&c") - (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) - (match_operand:QI 2 "register_operand" "a") - (match_operand:SI 3 "immediate_operand" "i") - (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) - (clobber (match_operand:SI 1 "register_operand" "=D")) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT" + [(set (match_operand:P 0 "register_operand" "=&c") + (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:P 3 "immediate_operand" "i") + (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS)) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "" "repnz{%;} scasb" [(set_attr "type" "str") (set_attr "mode" "QI") - (set_attr "prefix_rep" "1")]) - -(define_insn "*strlenqi_rex_1" - [(set (match_operand:DI 0 "register_operand" "=&c") - (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) - (match_operand:QI 2 "register_operand" "a") - (match_operand:DI 3 "immediate_operand" "i") - (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) - (clobber (match_operand:DI 1 "register_operand" "=D")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "repnz{%;} scasb" - [(set_attr "type" "str") - (set_attr "mode" "QI") - (set_attr "prefix_rex" "0") + (set (attr "prefix_rex") + (if_then_else + (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0)) + (const_string "0") + (const_string "*"))) (set_attr "prefix_rep" "1")]) ;; Peephole optimizations to clean up after cmpstrn*. This should be @@ -16478,8 +16192,7 @@ (use (match_dup 3)) (clobber (match_dup 0)) (clobber (match_dup 1)) - (clobber (match_dup 2))])] - "") + (clobber (match_dup 2))])]) ;; ...and this one handles cmpstrn*_1. (define_peephole2 @@ -16514,16 +16227,13 @@ (use (reg:CC FLAGS_REG)) (clobber (match_dup 0)) (clobber (match_dup 1)) - (clobber (match_dup 2))])] - "") - - + (clobber (match_dup 2))])]) ;; Conditional move instructions. (define_expand "mov<mode>cc" [(set (match_operand:SWIM 0 "register_operand" "") - (if_then_else:SWIM (match_operand 1 "comparison_operator" "") + (if_then_else:SWIM (match_operand 1 "ordered_comparison_operator" "") (match_operand:SWIM 2 "general_operand" "") (match_operand:SWIM 3 "general_operand" "")))] "" @@ -16542,9 +16252,7 @@ (const_int 0)]) (const_int -1) (const_int 0))) - (clobber (reg:CC FLAGS_REG))])] - "" - "") + (clobber (reg:CC FLAGS_REG))])]) (define_insn "*x86_mov<mode>cc_0_m1" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -16639,13 +16347,26 @@ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") -(define_insn "*movsfcc_1_387" - [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") - (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] - "TARGET_80387 && TARGET_CMOVE + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "TARGET_80387 && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +(define_insn "*movdfcc_1_rex64" + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} @@ -16653,7 +16374,7 @@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" [(set_attr "type" "fcmov,fcmov,icmov,icmov") - (set_attr "mode" "SF,SF,SI,SI")]) + (set_attr "mode" "DF,DF,DI,DI")]) (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") @@ -16669,23 +16390,7 @@ # #" [(set_attr "type" "fcmov,fcmov,multi,multi") - (set_attr "mode" "DF")]) - -(define_insn "*movdfcc_1_rex64" - [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") - (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] - "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE - && !(MEM_P (operands[2]) && MEM_P (operands[3]))" - "@ - fcmov%F1\t{%2, %0|%0, %2} - fcmov%f1\t{%3, %0|%0, %3} - cmov%O2%C1\t{%2, %0|%0, %2} - cmov%O2%c1\t{%3, %0|%0, %3}" - [(set_attr "type" "fcmov,fcmov,icmov,icmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "DF,DF,DI,DI")]) (define_split [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") @@ -16704,22 +16409,25 @@ (match_dup 7) (match_dup 8)))] { - split_di (&operands[2], 2, &operands[5], &operands[7]); - split_di (&operands[0], 1, &operands[2], &operands[3]); -}) - -(define_insn "*movxfcc_1" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + split_double_mode (DImode, &operands[2], 2, &operands[5], &operands[7]); + split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]); +}) + +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:XF 2 "register_operand" "f,0") - (match_operand:XF 3 "register_operand" "0,f")))] - "TARGET_80387 && TARGET_CMOVE" + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} - fcmov%f1\t{%3, %0|%0, %3}" - [(set_attr "type" "fcmov") - (set_attr "mode" "XF")]) + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict ;; the scalar versions to have only XMM registers as operands. @@ -16833,21 +16541,22 @@ ;; The % modifier is not operational anymore in peephole2's, so we have to ;; swap the operands manually in the case of addition and multiplication. "if (COMMUTATIVE_ARITH_P (operands[2])) - operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), - operands[0], operands[1]); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[0], operands[1]); else - operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), - operands[1], operands[0]);") + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[1], operands[0]);") ;; Conditional addition patterns (define_expand "add<mode>cc" [(match_operand:SWI 0 "register_operand" "") - (match_operand 1 "comparison_operator" "") + (match_operand 1 "ordered_comparison_operator" "") (match_operand:SWI 2 "register_operand" "") (match_operand:SWI 3 "const_int_operand" "")] "" "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") - ;; Misc patterns (?) @@ -16860,43 +16569,37 @@ ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] ;; ;; in proper program order. -(define_insn "pro_epilogue_adjust_stack_1" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (plus:SI (match_operand:SI 1 "register_operand" "0,r") - (match_operand:SI 2 "immediate_operand" "i,i"))) + +(define_insn "pro_epilogue_adjust_stack_<mode>_add" + [(set (match_operand:P 0 "register_operand" "=r,r") + (plus:P (match_operand:P 1 "register_operand" "0,r") + (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] - "!TARGET_64BIT" + "" { switch (get_attr_type (insn)) { case TYPE_IMOV: - return "mov{l}\t{%1, %0|%0, %1}"; + return "mov{<imodesuffix>}\t{%1, %0|%0, %1}"; case TYPE_ALU: - if (CONST_INT_P (operands[2]) - && (INTVAL (operands[2]) == 128 - || (INTVAL (operands[2]) < 0 - && INTVAL (operands[2]) != -128))) - { - operands[2] = GEN_INT (-INTVAL (operands[2])); - return "sub{l}\t{%2, %0|%0, %2}"; - } - return "add{l}\t{%2, %0|%0, %2}"; - - case TYPE_LEA: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + + default: operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - return "lea{l}\t{%a2, %0|%0, %a2}"; - - default: - gcc_unreachable (); - } -} - [(set (attr "type") - (cond [(and (eq_attr "alternative" "0") - (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) + return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "alu") - (match_operand:SI 2 "const0_operand" "") + (match_operand:<MODE> 2 "const0_operand" "") (const_string "imov") ] (const_string "lea"))) @@ -16908,113 +16611,33 @@ (const_string "1") ] (const_string "*"))) - (set_attr "mode" "SI")]) - -(define_insn "pro_epilogue_adjust_stack_rex64" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (plus:DI (match_operand:DI 1 "register_operand" "0,r") - (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "pro_epilogue_adjust_stack_<mode>_sub" + [(set (match_operand:P 0 "register_operand" "=r") + (minus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_IMOV: - return "mov{q}\t{%1, %0|%0, %1}"; - - case TYPE_ALU: - if (CONST_INT_P (operands[2]) - /* Avoid overflows. */ - && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) - && (INTVAL (operands[2]) == 128 - || (INTVAL (operands[2]) < 0 - && INTVAL (operands[2]) != -128))) - { - operands[2] = GEN_INT (-INTVAL (operands[2])); - return "sub{q}\t{%2, %0|%0, %2}"; - } - return "add{q}\t{%2, %0|%0, %2}"; - - case TYPE_LEA: - operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - return "lea{q}\t{%a2, %0|%0, %a2}"; - - default: - gcc_unreachable (); - } -} - [(set (attr "type") - (cond [(and (eq_attr "alternative" "0") - (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) - (const_string "alu") - (match_operand:DI 2 "const0_operand" "") - (const_string "imov") - ] - (const_string "lea"))) - (set (attr "length_immediate") - (cond [(eq_attr "type" "imov") - (const_string "0") - (and (eq_attr "type" "alu") - (match_operand 2 "const128_operand" "")) - (const_string "1") - ] - (const_string "*"))) - (set_attr "mode" "DI")]) - -(define_insn "pro_epilogue_adjust_stack_rex64_2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (plus:DI (match_operand:DI 1 "register_operand" "0,r") - (match_operand:DI 3 "immediate_operand" "i,i"))) - (use (match_operand:DI 2 "register_operand" "r,r")) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_ALU: - return "add{q}\t{%2, %0|%0, %2}"; - - case TYPE_LEA: - operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); - return "lea{q}\t{%a2, %0|%0, %a2}"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "alu,lea") - (set_attr "mode" "DI")]) - -(define_insn "allocate_stack_worker_32" - [(set (match_operand:SI 0 "register_operand" "=a") - (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")] + "" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "allocate_stack_worker_probe_<mode>" + [(set (match_operand:P 0 "register_operand" "=a") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] UNSPECV_STACK_PROBE)) - (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_STACK_PROBE" - "call\t___chkstk" - [(set_attr "type" "multi") - (set_attr "length" "5")]) - -(define_insn "allocate_stack_worker_64" - [(set (match_operand:DI 0 "register_operand" "=a") - (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] - UNSPECV_STACK_PROBE)) - (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1))) - (clobber (reg:DI R10_REG)) - (clobber (reg:DI R11_REG)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && TARGET_STACK_PROBE" - "call\t___chkstk" + (clobber (reg:CC FLAGS_REG))] + "ix86_target_stack_probe ()" + "call\t___chkstk_ms" [(set_attr "type" "multi") (set_attr "length" "5")]) (define_expand "allocate_stack" [(match_operand 0 "register_operand" "") (match_operand 1 "general_operand" "")] - "TARGET_STACK_PROBE" + "ix86_target_stack_probe ()" { rtx x; @@ -17034,10 +16657,13 @@ { x = copy_to_mode_reg (Pmode, operands[1]); if (TARGET_64BIT) - x = gen_allocate_stack_worker_64 (x, x); - else - x = gen_allocate_stack_worker_32 (x, x); - emit_insn (x); + emit_insn (gen_allocate_stack_worker_probe_di (x, x)); + else + emit_insn (gen_allocate_stack_worker_probe_si (x, x)); + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x, + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); } emit_move_insn (operands[0], virtual_stack_dynamic_rtx); @@ -17049,12 +16675,36 @@ [(match_operand 0 "memory_operand" "")] "" { - if (GET_MODE (operands[0]) == DImode) - emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx)); - else - emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx)); - DONE; -}) + rtx (*gen_ior3) (rtx, rtx, rtx); + + gen_ior3 = (GET_MODE (operands[0]) == DImode + ? gen_iordi3 : gen_iorsi3); + + emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx)); + DONE; +}) + +(define_insn "adjust_stack_and_probe<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] + UNSPECV_PROBE_STACK_RANGE)) + (set (reg:P SP_REG) + (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" + "* return output_adjust_stack_and_probe (operands[0]);" + [(set_attr "type" "multi")]) + +(define_insn "probe_stack_range<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "const_int_operand" "n")] + UNSPECV_PROBE_STACK_RANGE)) + (clobber (reg:CC FLAGS_REG))] + "" + "* return output_probe_stack_range (operands[0], operands[2]);" + [(set_attr "type" "multi")]) (define_expand "builtin_setjmp_receiver" [(label_ref (match_operand 0 "" ""))] @@ -17188,7 +16838,7 @@ (define_split [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "comparison_operator" + (if_then_else (match_operator 1 "ordered_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand 2 "register_operand" "") (match_operand 3 "register_operand" "")))] @@ -17202,33 +16852,21 @@ "operands[0] = gen_lowpart (SImode, operands[0]); operands[2] = gen_lowpart (SImode, operands[2]); operands[3] = gen_lowpart (SImode, operands[3]);") - ;; RTL Peephole optimizations, run before sched2. These primarily look to ;; transform a complex memory operation into two memory to register operations. ;; Don't push memory operands (define_peephole2 - [(set (match_operand:SI 0 "push_operand" "") - (match_operand:SI 1 "memory_operand" "")) - (match_scratch:SI 2 "r")] + [(set (match_operand:SWI 0 "push_operand" "") + (match_operand:SWI 1 "memory_operand" "")) + (match_scratch:SWI 2 "<r>")] "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(set (match_operand:DI 0 "push_operand" "") - (match_operand:DI 1 "memory_operand" "")) - (match_scratch:DI 2 "r")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -;; We need to handle SFmode only, because DFmode and XFmode is split to + (set (match_dup 0) (match_dup 2))]) + +;; We need to handle SFmode only, because DFmode and XFmode are split to ;; SImode pushes. (define_peephole2 [(set (match_operand:SF 0 "push_operand" "") @@ -17237,51 +16875,16 @@ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(set (match_operand:HI 0 "push_operand" "") - (match_operand:HI 1 "memory_operand" "")) - (match_scratch:HI 2 "r")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(set (match_operand:QI 0 "push_operand" "") - (match_operand:QI 1 "memory_operand" "")) - (match_scratch:QI 2 "q")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; Don't move an immediate directly to memory when the instruction ;; gets too big. (define_peephole2 - [(match_scratch:SI 1 "r") - (set (match_operand:SI 0 "memory_operand" "") + [(match_scratch:SWI124 1 "<r>") + (set (match_operand:SWI124 0 "memory_operand" "") (const_int 0))] "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 1) (const_int 0)) - (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 1))] - "") - -(define_peephole2 - [(match_scratch:HI 1 "r") - (set (match_operand:HI 0 "memory_operand" "") - (const_int 0))] - "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 + && !TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" @@ -17291,51 +16894,14 @@ "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 - [(match_scratch:QI 1 "q") - (set (match_operand:QI 0 "memory_operand" "") - (const_int 0))] - "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 2) (const_int 0)) - (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_lowpart (SImode, operands[1]);") - -(define_peephole2 - [(match_scratch:SI 2 "r") - (set (match_operand:SI 0 "memory_operand" "") - (match_operand:SI 1 "immediate_operand" ""))] + [(match_scratch:SWI124 2 "<r>") + (set (match_operand:SWI124 0 "memory_operand" "") + (match_operand:SWI124 1 "immediate_operand" ""))] "optimize_insn_for_speed_p () && TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(match_scratch:HI 2 "r") - (set (match_operand:HI 0 "memory_operand" "") - (match_operand:HI 1 "immediate_operand" ""))] - "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(match_scratch:QI 2 "q") - (set (match_operand:QI 0 "memory_operand" "") - (match_operand:QI 1 "immediate_operand" ""))] - "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; Don't compare memory with zero, load and use a test instead. (define_peephole2 @@ -17346,8 +16912,7 @@ (match_scratch:SI 3 "r")] "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) - (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] - "") + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. ;; Don't split NOTs with a displacement operand, because resulting XOR @@ -17361,46 +16926,18 @@ ;; lifetime information then. (define_peephole2 - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] - "optimize_insn_for_speed_p () - && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], SImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) - (xor:SI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + [(set (match_operand:SWI124 0 "nonimmediate_operand" "") + (not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "")))] "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], HImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], <MODE>mode))) + || (TARGET_NOT_VECTORMODE + && long_memory_operand (operands[0], <MODE>mode))) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) - (xor:HI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(set (match_operand:QI 0 "nonimmediate_operand" "") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] - "optimize_insn_for_speed_p () - && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], QImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) - (xor:QI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") + (xor:SWI124 (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])]) ;; Non pairable "test imm, reg" instructions can be translated to ;; "and imm, reg" if reg dies. The "and" form is also shorter (one @@ -17424,8 +16961,7 @@ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) - (and:SI (match_dup 2) (match_dup 3)))])] - "") + (and:SI (match_dup 2) (match_dup 3)))])]) ;; We don't need to handle HImode case, because it will be promoted to SImode ;; on ! TARGET_PARTIAL_REG_STALL @@ -17445,8 +16981,7 @@ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) - (and:QI (match_dup 2) (match_dup 3)))])] - "") + (and:QI (match_dup 2) (match_dup 3)))])]) (define_peephole2 [(set (match_operand 0 "flags_reg_operand" "") @@ -17479,8 +17014,7 @@ (match_dup 2) (const_int 8) (const_int 8)) - (match_dup 3)))])] - "") + (match_dup 3)))])]) ;; Don't do logical operations with memory inputs. (define_peephole2 @@ -17494,8 +17028,7 @@ [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SI 2 "r") @@ -17508,8 +17041,7 @@ [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address ;; refers to the destination of the load! @@ -17543,8 +17075,7 @@ || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) - (match_op_dup 3 [(match_dup 0) (match_dup 1)]))] - "") + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) ; Don't do logical operations with memory outputs ; @@ -17566,8 +17097,7 @@ (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_peephole2 [(match_scratch:SI 2 "r") @@ -17583,8 +17113,7 @@ (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; Attempt to always use XOR for zeroing registers. (define_peephole2 @@ -17596,9 +17125,7 @@ && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] -{ - operands[0] = gen_lowpart (word_mode, operands[0]); -}) + "operands[0] = gen_lowpart (word_mode, operands[0]);") (define_peephole2 [(set (strict_low_part (match_operand 0 "register_operand" "")) @@ -17610,66 +17137,48 @@ [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) -;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. -(define_peephole2 - [(set (match_operand 0 "register_operand" "") +;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand:SWI248 0 "register_operand" "") (const_int -1))] - "(GET_MODE (operands[0]) == HImode - || GET_MODE (operands[0]) == SImode - || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] - "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, - operands[0]);") - -;; Attempt to convert simple leas to adds. These can be created by -;; move expanders. -(define_peephole2 - [(set (match_operand:SI 0 "register_operand" "") - (plus:SI (match_dup 0) - (match_operand:SI 1 "nonmemory_operand" "")))] +{ + if (GET_MODE_SIZE (<MODE>mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +;; Attempt to convert simple lea to add/shift. +;; These can be created by move expanders. + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand" "") + (plus:SWI48 (match_dup 0) + (match_operand:SWI48 1 "<nonmemory_operand>" "")))] "peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))])] - "") + [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "nonmemory_operand" "")) 0))] - "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" + "TARGET_64BIT + && peep2_regno_dead_p (0, FLAGS_REG) + && REGNO (operands[0]) == REGNO (operands[1])" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_lowpart (SImode, operands[2]);") (define_peephole2 - [(set (match_operand:DI 0 "register_operand" "") - (plus:DI (match_dup 0) - (match_operand:DI 1 "x86_64_general_operand" "")))] - "peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_dup 0) - (match_operand:SI 1 "const_int_operand" "")))] + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_dup 0) + (match_operand:SWI48 1 "const_int_operand" "")))] "exact_log2 (INTVAL (operands[1])) >= 0 && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") - -(define_peephole2 - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_dup 0) - (match_operand:DI 1 "const_int_operand" "")))] - "exact_log2 (INTVAL (operands[1])) >= 0 - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) + [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") @@ -17677,7 +17186,8 @@ [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "const_int_operand" "")) 0))] - "exact_log2 (INTVAL (operands[2])) >= 0 + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) @@ -17685,12 +17195,13 @@ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") ;; The ESP adjustments can be done by the push and pop instructions. Resulting -;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On -;; many CPUs it is also faster, since special hardware to avoid esp +;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. +;; On many CPUs it is also faster, since special hardware to avoid esp ;; dependencies is present. -;; While some of these conversions may be done using splitters, we use peepholes -;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. +;; While some of these conversions may be done using splitters, we use +;; peepholes in order to allow combine_stack_adjustments pass to see +;; nonobfuscated RTL. ;; Convert prologue esp subtractions to push. ;; We need register to push. In order to keep verify_flow_info happy we have @@ -17704,119 +17215,131 @@ ;; alternative when no register is available later. (define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" - [(clobber (match_dup 0)) - (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) (define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" - [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) - (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) ;; Convert esp subtractions to push. (define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" - [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) - -(define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" - [(clobber (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) - (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + [(clobber (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) ;; Convert epilogue deallocator to pop. (define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) - (clobber (mem:BLK (scratch)))])] - "") - -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. -(define_peephole2 - [(match_scratch:SI 0 "r") - (match_scratch:SI 1 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + "(TARGET_SINGLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))])]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:P 1 "r") + (match_scratch:P 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") - -(define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) ;; Convert esp additions to pop. (define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") - -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. -(define_peephole2 - [(match_scratch:SI 0 "r") - (match_scratch:SI 1 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") - -(define_peephole2 - [(match_scratch:SI 0 "r") - (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:P 1 "r") + (match_scratch:P 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:P 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" + [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) ;; Convert compares with 1 to shorter inc/dec operations when CF is not ;; required and register dies. Similarly for 128 to -128. @@ -17833,144 +17356,28 @@ && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (match_dup 3)])) - (clobber (match_dup 2))])] - "") - -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" - [(clobber (match_dup 0)) - (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) - (clobber (mem:BLK (scratch)))])]) - -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" - [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) - (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) - (clobber (mem:BLK (scratch)))])]) - -;; Convert esp subtractions to push. -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" - [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) - -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" - [(clobber (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) - (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) - -;; Convert epilogue deallocator to pop. -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))])] - "") - -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. -(define_peephole2 - [(match_scratch:DI 0 "r") - (match_scratch:DI 1 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") - -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) - (clobber (reg:CC FLAGS_REG)) - (clobber (mem:BLK (scratch)))])] - "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") - -;; Convert esp additions to pop. -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") - -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. -(define_peephole2 - [(match_scratch:DI 0 "r") - (match_scratch:DI 1 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") - -(define_peephole2 - [(match_scratch:DI 0 "r") - (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) - (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + (clobber (match_dup 2))])]) ;; Convert imul by three, five and nine into lea (define_peephole2 [(parallel - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "register_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "INTVAL (operands[2]) == 3 || INTVAL (operands[2]) == 5 || INTVAL (operands[2]) == 9" [(set (match_dup 0) - (plus:SI (mult:SI (match_dup 1) (match_dup 2)) - (match_dup 1)))] - { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) + (match_dup 1)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") (define_peephole2 [(parallel - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_speed_p () && (INTVAL (operands[2]) == 3 @@ -17978,69 +17385,23 @@ || INTVAL (operands[2]) == 9)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 0) - (plus:SI (mult:SI (match_dup 0) (match_dup 2)) - (match_dup 0)))] - { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) - -(define_peephole2 - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "register_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT - && (INTVAL (operands[2]) == 3 - || INTVAL (operands[2]) == 5 - || INTVAL (operands[2]) == 9)" - [(set (match_dup 0) - (plus:DI (mult:DI (match_dup 1) (match_dup 2)) - (match_dup 1)))] - { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) - -(define_peephole2 - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT - && optimize_insn_for_speed_p () - && (INTVAL (operands[2]) == 3 - || INTVAL (operands[2]) == 5 - || INTVAL (operands[2]) == 9)" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 0) - (plus:DI (mult:DI (match_dup 0) (match_dup 2)) - (match_dup 0)))] - { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) - -;; Imul $32bit_imm, mem, reg is vector decoded, while + (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2)) + (match_dup 0)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") + +;; imul $32bit_imm, mem, reg is vector decoded, while ;; imul $32bit_imm, reg, reg is direct decoded. (define_peephole2 - [(match_scratch:DI 3 "r") - (parallel [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "memory_operand" "") - (match_operand:DI 2 "immediate_operand" ""))) + [(match_scratch:SWI48 3 "r") + (parallel [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "memory_operand" "") + (match_operand:SWI48 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) - (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] -"") - -(define_peephole2 - [(match_scratch:SI 3 "r") - (parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "memory_operand" "") - (match_operand:SI 2 "immediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () - && !satisfies_constraint_K (operands[2])" - [(set (match_dup 3) (match_dup 1)) - (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] -"") + (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SI 3 "r") @@ -18049,58 +17410,29 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + "TARGET_64BIT + && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) - (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) - (clobber (reg:CC FLAGS_REG))])] -"") + (parallel [(set (match_dup 0) + (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])]) ;; imul $8/16bit_imm, regmem, reg is vector decoded. ;; Convert it into imul reg, reg ;; It would be better to force assembler to encode instruction using long ;; immediate, but there is apparently no way to do so. (define_peephole2 - [(parallel [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) + [(parallel [(set (match_operand:SWI248 0 "register_operand" "") + (mult:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "") + (match_operand:SWI248 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) - (match_scratch:DI 3 "r")] + (match_scratch:SWI248 3 "r")] "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) - (clobber (reg:CC FLAGS_REG))])] -{ - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); -}) - -(define_peephole2 - [(parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))]) - (match_scratch:SI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () - && satisfies_constraint_K (operands[2])" - [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) - (clobber (reg:CC FLAGS_REG))])] -{ - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); -}) - -(define_peephole2 - [(parallel [(set (match_operand:HI 0 "register_operand" "") - (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "immediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))]) - (match_scratch:HI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()" - [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) + (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { if (!rtx_equal_p (operands[0], operands[1])) @@ -18120,54 +17452,66 @@ ;; leal (%edx,%eax,4), %eax (define_peephole2 - [(parallel [(set (match_operand 0 "register_operand" "") + [(match_scratch:P 5 "r") + (parallel [(set (match_operand 0 "register_operand" "") (ashift (match_operand 1 "register_operand" "") (match_operand 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) - (set (match_operand 3 "register_operand") - (match_operand 4 "x86_64_general_operand" "")) - (parallel [(set (match_operand 5 "register_operand" "") - (plus (match_operand 6 "register_operand" "") - (match_operand 7 "register_operand" ""))) + (parallel [(set (match_operand 3 "register_operand" "") + (plus (match_dup 0) + (match_operand 4 "x86_64_general_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3 + "IN_RANGE (INTVAL (operands[2]), 1, 3) /* Validate MODE for lea. */ && ((!TARGET_PARTIAL_REG_STALL && (GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)) || GET_MODE (operands[0]) == SImode || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + && (rtx_equal_p (operands[0], operands[3]) + || peep2_reg_dead_p (2, operands[0])) /* We reorder load and the shift. */ - && !rtx_equal_p (operands[1], operands[3]) - && !reg_overlap_mentioned_p (operands[0], operands[4]) - /* Last PLUS must consist of operand 0 and 3. */ - && !rtx_equal_p (operands[0], operands[3]) - && (rtx_equal_p (operands[3], operands[6]) - || rtx_equal_p (operands[3], operands[7])) - && (rtx_equal_p (operands[0], operands[6]) - || rtx_equal_p (operands[0], operands[7])) - /* The intermediate operand 0 must die or be same as output. */ - && (rtx_equal_p (operands[0], operands[5]) - || peep2_reg_dead_p (3, operands[0]))" - [(set (match_dup 3) (match_dup 4)) + && !reg_overlap_mentioned_p (operands[0], operands[4])" + [(set (match_dup 5) (match_dup 4)) (set (match_dup 0) (match_dup 1))] { - enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode; + enum machine_mode op1mode = GET_MODE (operands[1]); + enum machine_mode mode = op1mode == DImode ? DImode : SImode; int scale = 1 << INTVAL (operands[2]); rtx index = gen_lowpart (Pmode, operands[1]); - rtx base = gen_lowpart (Pmode, operands[3]); - rtx dest = gen_lowpart (mode, operands[5]); + rtx base = gen_lowpart (Pmode, operands[5]); + rtx dest = gen_lowpart (mode, operands[3]); operands[1] = gen_rtx_PLUS (Pmode, base, gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + operands[5] = base; if (mode != Pmode) operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + if (op1mode != Pmode) + operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0); operands[0] = dest; }) ;; Call-value patterns last so that the wildcard operand does not ;; disrupt insn-recog's switch tables. +(define_insn_and_split "*call_value_pop_0_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_pop_0" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) @@ -18176,12 +17520,24 @@ (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "")))] "!TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_pop_1_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_pop_1" @@ -18192,11 +17548,24 @@ (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (constant_call_address_operand (operands[1], Pmode)) - return "call\t%P1"; - return "call\t%A1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_pop_1_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i")))]) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_pop_1" @@ -18207,9 +17576,20 @@ (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i,i")))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" - "@ - jmp\t%P1 - jmp\t%A1" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_0" @@ -18217,12 +17597,20 @@ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) (match_operand:SI 2 "" "")))] "!TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_0_rex64" @@ -18230,12 +17618,34 @@ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) (match_operand:DI 2 "const_int_operand" "")))] "TARGET_64BIT" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_0_rex64_ms_sysv" @@ -18256,12 +17666,20 @@ (clobber (reg:DI SI_REG)) (clobber (reg:DI DI_REG))] "TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_1" @@ -18269,11 +17687,20 @@ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) (match_operand:SI 2 "" "")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (constant_call_address_operand (operands[1], Pmode)) - return "call\t%P1"; - return "call\t%A1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_1" @@ -18281,9 +17708,21 @@ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) (match_operand:SI 2 "" "")))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" - "@ - jmp\t%P1 - jmp\t%A1" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_1_rex64" @@ -18292,11 +17731,34 @@ (match_operand:DI 2 "" "")))] "TARGET_64BIT && !SIBLING_CALL_P (insn) && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" -{ - if (constant_call_address_operand (operands[1], Pmode)) - return "call\t%P1"; - return "call\t%A1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_1_rex64_ms_sysv" @@ -18317,11 +17779,20 @@ (clobber (reg:DI SI_REG)) (clobber (reg:DI DI_REG))] "TARGET_64BIT && !SIBLING_CALL_P (insn)" -{ - if (constant_call_address_operand (operands[1], Pmode)) - return "call\t%P1"; - return "call\t%A1"; -} + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*call_value_1_rex64_large_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*call_value_1_rex64_large" @@ -18329,7 +17800,20 @@ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) (match_operand:DI 2 "" "")))] "TARGET_64BIT && !SIBLING_CALL_P (insn)" - "call\t%A1" + { return ix86_output_call_insn (insn, operands[1], 1); } + [(set_attr "type" "callv")]) + +(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_1_rex64" @@ -18337,9 +17821,7 @@ (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) (match_operand:DI 2 "" "")))] "TARGET_64BIT && SIBLING_CALL_P (insn)" - "@ - jmp\t%P1 - jmp\t%A1" + { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. @@ -18353,178 +17835,6 @@ { return ASM_SHORT "0x0b0f"; } [(set_attr "length" "2")]) -(define_expand "sse_prologue_save" - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" ""))) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_dup 1))])] - "TARGET_64BIT" - "") - -;; Pre-reload version of prologue save. Until after prologue generation we don't know -;; what the size of save instruction will be. -;; Operand 0+operand 6 is the memory save area -;; Operand 1 is number of registers to save (will get overwritten to operand 5) -;; Operand 2 is number of non-vaargs SSE arguments -;; Operand 3 is label starting the save block -;; Operand 4 is used for temporary computation of jump address -(define_insn "*sse_prologue_save_insn1" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 6 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "=r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (clobber (match_operand:DI 4 "register_operand" "=&r")) - (use (match_operand:DI 5 "register_operand" "1"))] - "TARGET_64BIT - && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128" - "#" - [(set_attr "type" "other") - (set_attr "memory" "store") - (set_attr "mode" "DI")]) - -;; We know size of save instruction; expand the computation of jump address -;; in the jumptable. -(define_split - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "const_int_operand" "")) - (use (match_operand 3 "" "")) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_operand:DI 5 "register_operand" ""))])] - "reload_completed" - [(parallel [(set (match_dup 0) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_dup 1)) - (use (match_dup 2)) - (use (match_dup 3)) - (use (match_dup 5))])] -{ - /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */ - int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128); - - /* Compute address to jump to: - label - eax*size + nnamed_sse_arguments*size. */ - if (size == 5) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_PLUS - (Pmode, - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)), - operands[1]))); - else if (size == 4) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)))); - else - gcc_unreachable (); - if (INTVAL (operands[2])) - emit_move_insn - (operands[1], - gen_rtx_CONST (DImode, - gen_rtx_PLUS (DImode, - operands[3], - GEN_INT (INTVAL (operands[2]) - * size)))); - else - emit_move_insn (operands[1], operands[3]); - emit_insn (gen_subdi3 (operands[1], operands[1], operands[4])); - operands[5] = GEN_INT (size); -}) - -(define_insn "sse_prologue_save_insn" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_operand:DI 1 "register_operand" "r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (use (match_operand:DI 5 "const_int_operand" "i"))] - "TARGET_64BIT - && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" -{ - int i; - operands[0] = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, operands[0], operands[4])); - /* VEX instruction with a REX prefix will #UD. */ - if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS) - gcc_unreachable (); - - output_asm_insn ("jmp\t%A1", operands); - for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) - { - operands[4] = adjust_address (operands[0], DImode, i*16); - operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); - PUT_MODE (operands[4], TImode); - if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn ("rex", operands); - if (crtl->stack_alignment_needed < 128) - output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands); - else - output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); - } - (*targetm.asm_out.internal_label) (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[3])); - return ""; -} - [(set_attr "type" "other") - (set_attr "length_immediate" "0") - (set_attr "length_address" "0") - ;; 2 bytes for jump and opernds[4] bytes for each save. - (set (attr "length") - (plus (const_int 2) - (mult (symbol_ref ("INTVAL (operands[5])")) - (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])"))))) - (set_attr "memory" "store") - (set_attr "modrm" "0") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "DI")]) - (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") (match_operand:SI 1 "const_int_operand" "") @@ -18549,31 +17859,11 @@ operands[1] = const0_rtx; }) -(define_insn "*prefetch_sse" - [(prefetch (match_operand:SI 0 "address_operand" "p") +(define_insn "*prefetch_sse_<mode>" + [(prefetch (match_operand:P 0 "address_operand" "p") (const_int 0) (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && !TARGET_64BIT" -{ - static const char * const patterns[4] = { - "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" - }; - - int locality = INTVAL (operands[1]); - gcc_assert (locality >= 0 && locality <= 3); - - return patterns[locality]; -} - [(set_attr "type" "sse") - (set_attr "atom_sse_attr" "prefetch") - (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])")) - (set_attr "memory" "none")]) - -(define_insn "*prefetch_sse_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (const_int 0) - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && TARGET_64BIT" + "TARGET_PREFETCH_SSE" { static const char * const patterns[4] = { "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" @@ -18586,29 +17876,15 @@ } [(set_attr "type" "sse") (set_attr "atom_sse_attr" "prefetch") - (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])")) + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0])")) (set_attr "memory" "none")]) -(define_insn "*prefetch_3dnow" - [(prefetch (match_operand:SI 0 "address_operand" "p") +(define_insn "*prefetch_3dnow_<mode>" + [(prefetch (match_operand:P 0 "address_operand" "p") (match_operand:SI 1 "const_int_operand" "n") (const_int 3))] - "TARGET_3DNOW && !TARGET_64BIT" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx") - (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])")) - (set_attr "memory" "none")]) - -(define_insn "*prefetch_3dnow_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 3))] - "TARGET_3DNOW && TARGET_64BIT" + "TARGET_3DNOW" { if (INTVAL (operands[1]) == 0) return "prefetch\t%a0"; @@ -18616,7 +17892,8 @@ return "prefetchw\t%a0"; } [(set_attr "type" "mmx") - (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])")) + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0])")) (set_attr "memory" "none")]) (define_expand "stack_protect_set" @@ -18624,64 +17901,40 @@ (match_operand 1 "memory_operand" "")] "" { + rtx (*insn)(rtx, rtx); + #ifdef TARGET_THREAD_SSP_OFFSET - if (TARGET_64BIT) - emit_insn (gen_stack_tls_protect_set_di (operands[0], - GEN_INT (TARGET_THREAD_SSP_OFFSET))); - else - emit_insn (gen_stack_tls_protect_set_si (operands[0], - GEN_INT (TARGET_THREAD_SSP_OFFSET))); + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_64BIT + ? gen_stack_tls_protect_set_di + : gen_stack_tls_protect_set_si); #else - if (TARGET_64BIT) - emit_insn (gen_stack_protect_set_di (operands[0], operands[1])); - else - emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); + insn = (TARGET_64BIT + ? gen_stack_protect_set_di + : gen_stack_protect_set_si); #endif - DONE; -}) - -(define_insn "stack_protect_set_si" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) - (set (match_scratch:SI 2 "=&r") (const_int 0)) - (clobber (reg:CC FLAGS_REG))] - "" - "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + + emit_insn (insn (operands[0], operands[1])); + DONE; +}) + +(define_insn "stack_protect_set_<mode>" + [(set (match_operand:P 0 "memory_operand" "=m") + (unspec:P [(match_operand:P 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:P 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" [(set_attr "type" "multi")]) -(define_insn "stack_protect_set_di" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) - (set (match_scratch:DI 2 "=&r") (const_int 0)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" - [(set_attr "type" "multi")]) - -(define_insn "stack_tls_protect_set_si" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) - (set (match_scratch:SI 2 "=&r") (const_int 0)) - (clobber (reg:CC FLAGS_REG))] - "" - "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" - [(set_attr "type" "multi")]) - -(define_insn "stack_tls_protect_set_di" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) - (set (match_scratch:DI 2 "=&r") (const_int 0)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - { - /* The kernel uses a different segment register for performance reasons; a - system call would not have to trash the userspace segment register, - which would be expensive */ - if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; - else - return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; - } +(define_insn "stack_tls_protect_set_<mode>" + [(set (match_operand:P 0 "memory_operand" "=m") + (unspec:P [(match_operand:P 1 "const_int_operand" "i")] + UNSPEC_SP_TLS_SET)) + (set (match_scratch:P 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" [(set_attr "type" "multi")]) (define_expand "stack_protect_test" @@ -18692,71 +17945,44 @@ { rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + rtx (*insn)(rtx, rtx, rtx); + #ifdef TARGET_THREAD_SSP_OFFSET - if (TARGET_64BIT) - emit_insn (gen_stack_tls_protect_test_di (flags, operands[0], - GEN_INT (TARGET_THREAD_SSP_OFFSET))); - else - emit_insn (gen_stack_tls_protect_test_si (flags, operands[0], - GEN_INT (TARGET_THREAD_SSP_OFFSET))); + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_64BIT + ? gen_stack_tls_protect_test_di + : gen_stack_tls_protect_test_si); #else - if (TARGET_64BIT) - emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1])); - else - emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1])); + insn = (TARGET_64BIT + ? gen_stack_protect_test_di + : gen_stack_protect_test_si); #endif + emit_insn (insn (flags, operands[0], operands[1])); + emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx), flags, const0_rtx, operands[2])); DONE; }) -(define_insn "stack_protect_test_si" - [(set (match_operand:CCZ 0 "flags_reg_operand" "") - (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") - (match_operand:SI 2 "memory_operand" "m")] - UNSPEC_SP_TEST)) - (clobber (match_scratch:SI 3 "=&r"))] - "" - "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}" - [(set_attr "type" "multi")]) - -(define_insn "stack_protect_test_di" - [(set (match_operand:CCZ 0 "flags_reg_operand" "") - (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") - (match_operand:DI 2 "memory_operand" "m")] - UNSPEC_SP_TEST)) - (clobber (match_scratch:DI 3 "=&r"))] - "TARGET_64BIT" - "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}" - [(set_attr "type" "multi")]) - -(define_insn "stack_tls_protect_test_si" +(define_insn "stack_protect_test_<mode>" [(set (match_operand:CCZ 0 "flags_reg_operand" "") - (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") - (match_operand:SI 2 "const_int_operand" "i")] - UNSPEC_SP_TLS_TEST)) - (clobber (match_scratch:SI 3 "=r"))] - "" - "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}" + (unspec:CCZ [(match_operand:P 1 "memory_operand" "m") + (match_operand:P 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:P 3 "=&r"))] + "" + "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}" [(set_attr "type" "multi")]) -(define_insn "stack_tls_protect_test_di" +(define_insn "stack_tls_protect_test_<mode>" [(set (match_operand:CCZ 0 "flags_reg_operand" "") - (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") - (match_operand:DI 2 "const_int_operand" "i")] + (unspec:CCZ [(match_operand:P 1 "memory_operand" "m") + (match_operand:P 2 "const_int_operand" "i")] UNSPEC_SP_TLS_TEST)) - (clobber (match_scratch:DI 3 "=r"))] - "TARGET_64BIT" - { - /* The kernel uses a different segment register for performance reasons; a - system call would not have to trash the userspace segment register, - which would be expensive */ - if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}"; - else - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}"; - } + (clobber (match_scratch:P 3 "=r"))] + "" + "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}" [(set_attr "type" "multi")]) (define_insn "sse4_2_crc32<mode>" @@ -18965,8 +18191,7 @@ (define_expand "lwp_llwpcb" [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPECV_LLWP_INTRINSIC)] - "TARGET_LWP" - "") + "TARGET_LWP") (define_insn "*lwp_llwpcb<mode>1" [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] @@ -18981,13 +18206,13 @@ [(set (match_operand 0 "register_operand" "=r") (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] "TARGET_LWP" - { - if (TARGET_64BIT) - emit_insn (gen_lwp_slwpcbdi (operands[0])); - else - emit_insn (gen_lwp_slwpcbsi (operands[0])); - DONE; - }) +{ + if (TARGET_64BIT) + emit_insn (gen_lwp_slwpcbdi (operands[0])); + else + emit_insn (gen_lwp_slwpcbsi (operands[0])); + DONE; +}) (define_insn "lwp_slwpcb<mode>" [(set (match_operand:P 0 "register_operand" "=r") @@ -19027,8 +18252,7 @@ UNSPECV_LWPINS_INTRINSIC)) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))] - "TARGET_LWP" - "") + "TARGET_LWP") (define_insn "*lwp_lwpins<mode>3_1" [(set (reg:CCC FLAGS_REG) @@ -19043,6 +18267,48 @@ (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) +(define_insn "rdfsbase<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDFSBASE))] + "TARGET_64BIT && TARGET_FSGSBASE" + "rdfsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "rdgsbase<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDGSBASE))] + "TARGET_64BIT && TARGET_FSGSBASE" + "rdgsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "wrfsbase<mode>" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] + UNSPECV_WRFSBASE)] + "TARGET_64BIT && TARGET_FSGSBASE" + "wrfsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "wrgsbase<mode>" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] + UNSPECV_WRGSBASE)] + "TARGET_64BIT && TARGET_FSGSBASE" + "wrgsbase %0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "rdrand<mode>_1" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec:SWI248 [(const_int 0)] UNSPEC_RDRAND)) + (set (reg:CCC FLAGS_REG) + (unspec:CCC [(const_int 0)] UNSPEC_RDRAND))] + "TARGET_RDRND" + "rdrand\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "1")]) + (include "mmx.md") (include "sse.md") (include "sync.md")