Mercurial > hg > CbC > CbC_gcc
diff gcc/config/i386/i386.md @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | ab0bcb71f44d 84e7813d76e9 |
children | 351920fa3827 |
line wrap: on
line diff
--- a/gcc/config/i386/i386.md Thu Oct 25 08:08:40 2018 +0900 +++ b/gcc/config/i386/i386.md Thu Oct 25 10:21:07 2018 +0900 @@ -1,5 +1,5 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988-2017 Free Software Foundation, Inc. +;; Copyright (C) 1988-2018 Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka ;; @@ -62,7 +62,7 @@ ;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode -;; ! -- print MPX or NOTRACK prefix for jxx/call/ret instructions if required. +;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required. (define_c_enum "unspec" [ ;; Relocation specifiers @@ -99,9 +99,9 @@ UNSPEC_SCAS UNSPEC_FNSTSW UNSPEC_SAHF + UNSPEC_NOTRAP UNSPEC_PARITY UNSPEC_FSTCW - UNSPEC_FLDCW UNSPEC_REP UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_TRUNC_NOOP @@ -143,7 +143,6 @@ UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC - UNSPEC_FRNDINT_MASK_PM UNSPEC_FIST_FLOOR UNSPEC_FIST_CEIL @@ -183,16 +182,6 @@ UNSPEC_PDEP UNSPEC_PEXT - UNSPEC_BNDMK - UNSPEC_BNDMK_ADDR - UNSPEC_BNDSTX - UNSPEC_BNDLDX - UNSPEC_BNDLDX_ADDR - UNSPEC_BNDCL - UNSPEC_BNDCU - UNSPEC_BNDCN - UNSPEC_MPX_FENCE - ;; IRET support UNSPEC_INTERRUPT_RETURN ]) @@ -236,6 +225,8 @@ UNSPECV_XSAVEC64 UNSPECV_XGETBV UNSPECV_XSETBV + UNSPECV_WBINVD + UNSPECV_WBNOINVD ;; For atomic compound assignments. UNSPECV_FNSTENV @@ -286,6 +277,21 @@ UNSPECV_WRUSS UNSPECV_SETSSBSY UNSPECV_CLRSSBSY + + ;; For MOVDIRI and MOVDIR64B support + UNSPECV_MOVDIRI + UNSPECV_MOVDIR64B + + ;; For WAITPKG support + UNSPECV_UMWAIT + UNSPECV_UMONITOR + UNSPECV_TPAUSE + + ;; For CLDEMOTE support + UNSPECV_CLDEMOTE + + ;; For Speculation Barrier support + UNSPECV_SPECULATION_BARRIER ]) ;; Constants to represent rounding modes in the ROUND instruction @@ -352,69 +358,64 @@ (ARGP_REG 16) (FLAGS_REG 17) (FPSR_REG 18) - (FPCR_REG 19) - (FRAME_REG 20) - (XMM0_REG 21) - (XMM1_REG 22) - (XMM2_REG 23) - (XMM3_REG 24) - (XMM4_REG 25) - (XMM5_REG 26) - (XMM6_REG 27) - (XMM7_REG 28) - (MM0_REG 29) - (MM1_REG 30) - (MM2_REG 31) - (MM3_REG 32) - (MM4_REG 33) - (MM5_REG 34) - (MM6_REG 35) - (MM7_REG 36) - (R8_REG 37) - (R9_REG 38) - (R10_REG 39) - (R11_REG 40) - (R12_REG 41) - (R13_REG 42) - (R14_REG 43) - (R15_REG 44) - (XMM8_REG 45) - (XMM9_REG 46) - (XMM10_REG 47) - (XMM11_REG 48) - (XMM12_REG 49) - (XMM13_REG 50) - (XMM14_REG 51) - (XMM15_REG 52) - (XMM16_REG 53) - (XMM17_REG 54) - (XMM18_REG 55) - (XMM19_REG 56) - (XMM20_REG 57) - (XMM21_REG 58) - (XMM22_REG 59) - (XMM23_REG 60) - (XMM24_REG 61) - (XMM25_REG 62) - (XMM26_REG 63) - (XMM27_REG 64) - (XMM28_REG 65) - (XMM29_REG 66) - (XMM30_REG 67) - (XMM31_REG 68) - (MASK0_REG 69) - (MASK1_REG 70) - (MASK2_REG 71) - (MASK3_REG 72) - (MASK4_REG 73) - (MASK5_REG 74) - (MASK6_REG 75) - (MASK7_REG 76) - (BND0_REG 77) - (BND1_REG 78) - (BND2_REG 79) - (BND3_REG 80) - (FIRST_PSEUDO_REG 81) + (FRAME_REG 19) + (XMM0_REG 20) + (XMM1_REG 21) + (XMM2_REG 22) + (XMM3_REG 23) + (XMM4_REG 24) + (XMM5_REG 25) + (XMM6_REG 26) + (XMM7_REG 27) + (MM0_REG 28) + (MM1_REG 29) + (MM2_REG 30) + (MM3_REG 31) + (MM4_REG 32) + (MM5_REG 33) + (MM6_REG 34) + (MM7_REG 35) + (R8_REG 36) + (R9_REG 37) + (R10_REG 38) + (R11_REG 39) + (R12_REG 40) + (R13_REG 41) + (R14_REG 42) + (R15_REG 43) + (XMM8_REG 44) + (XMM9_REG 45) + (XMM10_REG 46) + (XMM11_REG 47) + (XMM12_REG 48) + (XMM13_REG 49) + (XMM14_REG 50) + (XMM15_REG 51) + (XMM16_REG 52) + (XMM17_REG 53) + (XMM18_REG 54) + (XMM19_REG 55) + (XMM20_REG 56) + (XMM21_REG 57) + (XMM22_REG 58) + (XMM23_REG 59) + (XMM24_REG 60) + (XMM25_REG 61) + (XMM26_REG 62) + (XMM27_REG 63) + (XMM28_REG 64) + (XMM29_REG 65) + (XMM30_REG 66) + (XMM31_REG 67) + (MASK0_REG 68) + (MASK1_REG 69) + (MASK2_REG 70) + (MASK3_REG 71) + (MASK4_REG 72) + (MASK5_REG 73) + (MASK6_REG 74) + (MASK7_REG 75) + (FIRST_PSEUDO_REG 76) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -428,7 +429,7 @@ ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, - atom,slm,haswell,generic,amdfam10,bdver1,bdver2,bdver3, + atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3, bdver4,btver2,znver1" (const (symbol_ref "ix86_schedule"))) @@ -449,8 +450,7 @@ ssecvt,ssecvt1,sseicvt,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg, lwp,mskmov,msklog, - mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft, - mpxmov,mpxmk,mpxchk,mpxld,mpxst" + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn @@ -479,8 +479,7 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, - bitmanip,imulx,msklog,mskmov,mpxmk,mpxmov,mpxchk, - mpxld,mpxst") + bitmanip,imulx,msklog,mskmov") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) @@ -535,17 +534,13 @@ (const_int 0) (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) (const_int 1) - (and (eq_attr "type" "ibr,call,callv") - (match_test "ix86_bnd_prefixed_insn_p (insn)")) - (const_int 1) ] (const_int 0))) ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" (if_then_else - (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov, - mpxmk,mpxmov,mpxchk,mpxld,mpxst") + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov") (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) @@ -581,9 +576,6 @@ ] (const_int 0))) -;; Set when BND opcode prefix may be used. -(define_attr "maybe_prefix_bnd" "" (const_int 0)) - ;; Prefix used: original, VEX or maybe VEX. (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex" (cond [(eq_attr "mode" "OI,V8SF,V4DF") @@ -651,19 +643,6 @@ ] (const_int 1))) -(define_attr "modrm_class" "none,incdec,op0,op01,op02,pushpop,unknown" - (cond [(eq_attr "modrm" "0") - (const_string "none") - (eq_attr "type" "alu,imul,ishift") - (const_string "op02") - (eq_attr "type" "imov,imovx,lea,alu1,icmp") - (const_string "op01") - (eq_attr "type" "incdec") - (const_string "incdec") - (eq_attr "type" "push,pop") - (const_string "pushpop")] - (const_string "unknown"))) - ;; The (bounding maximum) length of an instruction in bytes. ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. ;; Later we may want to split them and compute proper length as for @@ -710,16 +689,12 @@ (define_attr "memory" "none,load,store,both,unknown" (cond [(eq_attr "type" "other,multi,str,lwp") (const_string "unknown") - (eq_attr "type" "lea,fcmov,fpspc,mpxmk,mpxchk") + (eq_attr "type" "lea,fcmov,fpspc") (const_string "none") (eq_attr "type" "fistp,leave") (const_string "both") (eq_attr "type" "frndint") (const_string "load") - (eq_attr "type" "mpxld") - (const_string "load") - (eq_attr "type" "mpxst") - (const_string "store") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand") (const_string "both") @@ -749,7 +724,7 @@ (if_then_else (match_operand 1 "constant_call_address_operand") (const_string "none") (const_string "load")) - (and (eq_attr "type" "alu1,negnot,ishift1,sselog1,sseshuf1") + (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1") (match_operand 1 "memory_operand")) (const_string "both") (and (match_operand 0 "memory_operand") @@ -760,12 +735,12 @@ (match_operand 1 "memory_operand") (const_string "load") (and (eq_attr "type" - "!alu1,negnot,ishift1, + "!alu1,negnot,ishift1,rotate1, imov,imovx,icmp,test,bitmanip, fmov,fcmp,fsgn, sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1, - mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog,mpxmov") + mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") (match_operand 2 "memory_operand")) (const_string "load") (and (eq_attr "type" "icmov,ssemuladd,sse4arg") @@ -797,7 +772,7 @@ ;; Defines rounding mode of an FP operation. -(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" +(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any" (const_string "any")) ;; Define attribute to classify add/sub insns that consumes carry flag (CF) @@ -807,7 +782,7 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, +(define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64, sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq, @@ -816,6 +791,8 @@ (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") + (eq_attr "isa" "x64_sse2") + (symbol_ref "TARGET_64BIT && TARGET_SSE2") (eq_attr "isa" "x64_sse4") (symbol_ref "TARGET_64BIT && TARGET_SSE4_1") (eq_attr "isa" "x64_sse4_noavx") @@ -943,7 +920,7 @@ (define_code_iterator absneg [abs neg]) ;; Base name for x87 insn mnemonic. -(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")]) +(define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")]) ;; Used in signed and unsigned widening multiplications. (define_code_iterator any_extend [sign_extend zero_extend]) @@ -964,10 +941,14 @@ ;; Used in signed and unsigned fix. (define_code_iterator any_fix [fix unsigned_fix]) (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")]) +(define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")]) +(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")]) ;; Used in signed and unsigned float. (define_code_iterator any_float [float unsigned_float]) (define_code_attr floatsuffix [(float "") (unsigned_float "u")]) +(define_code_attr floatunssuffix [(float "") (unsigned_float "uns")]) +(define_code_attr floatprefix [(float "s") (unsigned_float "u")]) ;; All integer modes. (define_mode_iterator SWI1248x [QI HI SI DI]) @@ -1049,21 +1030,6 @@ (define_mode_iterator DWIH [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) -;; Bound modes. -(define_mode_iterator BND [(BND32 "!TARGET_LP64") - (BND64 "TARGET_LP64")]) - -;; Pointer mode corresponding to bound mode. -(define_mode_attr bnd_ptr [(BND32 "SI") (BND64 "DI")]) - -;; MPX check types -(define_int_iterator BNDCHECK [UNSPEC_BNDCL UNSPEC_BNDCU UNSPEC_BNDCN]) - -;; Check name -(define_int_attr bndcheck [(UNSPEC_BNDCL "cl") - (UNSPEC_BNDCU "cu") - (UNSPEC_BNDCN "cn")]) - ;; Instruction suffix for integer modes. (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) @@ -1195,6 +1161,7 @@ ;; Instruction suffix for REX 64bit operators. (define_mode_attr rex64suffix [(SI "") (DI "{q}")]) +(define_mode_attr rex64namesuffix [(SI "") (DI "q")]) ;; This mode iterator allows :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. @@ -1223,6 +1190,7 @@ (include "geode.md") (include "atom.md") (include "slm.md") +(include "glm.md") (include "core2.md") (include "haswell.md") @@ -1274,6 +1242,25 @@ (compare:CC (match_operand:SWI48 0 "nonimmediate_operand") (match_operand:SWI48 1 "<general_operand>")))]) +(define_mode_iterator SWI1248_AVX512BWDQ2_64 + [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ") + (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")]) + +(define_insn "*cmp<mode>_ccz_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI1248_AVX512BWDQ2_64 0 + "nonimmediate_operand" "<r>,?m<r>,$k") + (match_operand:SWI1248_AVX512BWDQ2_64 1 "const0_operand")))] + "ix86_match_ccmode (insn, CCZmode)" + "@ + test{<imodesuffix>}\t%0, %0 + cmp{<imodesuffix>}\t{%1, %0|%0, %1} + ktest<mskmodesuffix>\t%0, %0" + [(set_attr "type" "test,icmp,msklog") + (set_attr "length_immediate" "0,1,*") + (set_attr "prefix" "*,*,vex") + (set_attr "mode" "<MODE>")]) + (define_insn "*cmp<mode>_ccno_1" [(set (reg FLAGS_REG) (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>") @@ -1284,7 +1271,6 @@ cmp{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") - (set_attr "modrm_class" "op0,unknown") (set_attr "mode" "<MODE>")]) (define_insn "*cmp<mode>_1" @@ -1475,55 +1461,18 @@ DONE; }) - ;; FP compares, step 1: -;; Set the FP condition codes. -;; -;; CCFPmode compare with exceptions -;; CCFPUmode compare with no exceptions - -;; We may not use "#" to split and emit these, since the REG_DEAD notes -;; used to manage the reg stack popping would not be preserved. - -(define_insn "*cmp<mode>_0_i387" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI - [(compare:CCFP - (match_operand:X87MODEF 1 "register_operand" "f") - (match_operand:X87MODEF 2 "const0_operand"))] - UNSPEC_FNSTSW))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, false, false);" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) - -(define_insn_and_split "*cmp<mode>_0_cc_i387" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP - (match_operand:X87MODEF 1 "register_operand" "f") - (match_operand:X87MODEF 2 "const0_operand"))) - (clobber (match_operand:HI 0 "register_operand" "=a"))] - "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" - "#" - "&& reload_completed" - [(set (match_dup 0) - (unspec:HI - [(compare:CCFP (match_dup 1)(match_dup 2))] - UNSPEC_FNSTSW)) - (set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] - "" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) +;; Set the FP condition codes and move fpsr to ax. + +;; We may not use "#" to split and emit these +;; due to reg-stack pops killing fpsr. (define_insn "*cmpxf_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))] + (match_operand:XF 2 "reg_or_0_operand" "fC"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, false, false);" @@ -1531,32 +1480,12 @@ (set_attr "unit" "i387") (set_attr "mode" "XF")]) -(define_insn_and_split "*cmpxf_cc_i387" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP - (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))) - (clobber (match_operand:HI 0 "register_operand" "=a"))] - "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" - "#" - "&& reload_completed" - [(set (match_dup 0) - (unspec:HI - [(compare:CCFP (match_dup 1)(match_dup 2))] - UNSPEC_FNSTSW)) - (set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] - "" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "XF")]) - (define_insn "*cmp<mode>_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:MODEF 1 "register_operand" "f") - (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] + (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, false, false);" @@ -1564,66 +1493,13 @@ (set_attr "unit" "i387") (set_attr "mode" "<MODE>")]) -(define_insn_and_split "*cmp<mode>_cc_i387" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP - (match_operand:MODEF 1 "register_operand" "f") - (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) - (clobber (match_operand:HI 0 "register_operand" "=a"))] - "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" - "#" - "&& reload_completed" - [(set (match_dup 0) - (unspec:HI - [(compare:CCFP (match_dup 1)(match_dup 2))] - UNSPEC_FNSTSW)) - (set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] - "" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) - -(define_insn "*cmpu<mode>_i387" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI - [(compare:CCFPU - (match_operand:X87MODEF 1 "register_operand" "f") - (match_operand:X87MODEF 2 "register_operand" "f"))] - UNSPEC_FNSTSW))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, false, true);" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) - -(define_insn_and_split "*cmpu<mode>_cc_i387" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU - (match_operand:X87MODEF 1 "register_operand" "f") - (match_operand:X87MODEF 2 "register_operand" "f"))) - (clobber (match_operand:HI 0 "register_operand" "=a"))] - "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" - "#" - "&& reload_completed" - [(set (match_dup 0) - (unspec:HI - [(compare:CCFPU (match_dup 1)(match_dup 2))] - UNSPEC_FNSTSW)) - (set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] - "" - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) - (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:X87MODEF 1 "register_operand" "f") (float:X87MODEF - (match_operand:SWI24 2 "memory_operand" "m")))] + (match_operand:SWI24 2 "nonimmediate_operand" "m")))] UNSPEC_FNSTSW))] "TARGET_80387 && (TARGET_USE_<SWI24:MODE>MODE_FIOP @@ -1634,45 +1510,22 @@ (set_attr "fp_int_src" "true") (set_attr "mode" "<SWI24:MODE>")]) -(define_insn_and_split "*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP - (match_operand:X87MODEF 1 "register_operand" "f") - (float:X87MODEF - (match_operand:SWI24 2 "memory_operand" "m")))) - (clobber (match_operand:HI 0 "register_operand" "=a"))] - "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE - && (TARGET_USE_<SWI24:MODE>MODE_FIOP - || optimize_function_for_size_p (cfun))" - "#" - "&& reload_completed" - [(set (match_dup 0) +(define_insn "*cmpu<mode>_i387" + [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI - [(compare:CCFP - (match_dup 1) - (float:X87MODEF (match_dup 2)))] - UNSPEC_FNSTSW)) - (set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] - "" + [(unspec:CCFP + [(compare:CCFP + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "register_operand" "f"))] + UNSPEC_NOTRAP)] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, false, true);" [(set_attr "type" "multi") (set_attr "unit" "i387") - (set_attr "fp_int_src" "true") - (set_attr "mode" "<SWI24:MODE>")]) - -;; FP compares, step 2 -;; Move the fpsw to ax. - -(define_insn "x86_fnstsw_1" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] - "TARGET_80387" - "fnstsw\t%0" - [(set_attr "length" "2") - (set_attr "mode" "SI") - (set_attr "unit" "i387")]) - -;; FP compares, step 3 + (set_attr "mode" "<MODE>")]) + +;; FP compares, step 2: ;; Get ax into flags, general case. (define_insn "x86_sahf_1" @@ -1694,23 +1547,45 @@ (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) -;; Pentium Pro can do steps 1 through 3 in one go. +;; Pentium Pro can do both steps in one go. ;; (these instructions set flags directly) -(define_mode_iterator FPCMP [CCFP CCFPU]) -(define_mode_attr unord [(CCFP "") (CCFPU "u")]) - -(define_insn "*cmpi<FPCMP:unord><MODEF:mode>" - [(set (reg:FPCMP FLAGS_REG) - (compare:FPCMP +(define_subst_attr "unord" "unord_subst" "" "u") +(define_subst_attr "unordered" "unord_subst" "false" "true") + +(define_subst "unord_subst" + [(set (match_operand:CCFP 0) + (match_operand:CCFP 1))] + "" + [(set (match_dup 0) + (unspec:CCFP + [(match_dup 1)] + UNSPEC_NOTRAP))]) + +(define_insn "*cmpi<unord>xf_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 0 "register_operand" "f") + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE" + "* return output_fp_compare (insn, operands, true, <unordered>);" + [(set_attr "type" "fcmp") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double") + (set_attr "znver1_decode" "double")]) + +(define_insn "*cmpi<unord><MODEF:mode>" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand:MODEF 0 "register_operand" "f,v") (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))] "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) || (TARGET_80387 && TARGET_CMOVE)" "@ - * return output_fp_compare (insn, operands, true, \ - <FPCMP:MODE>mode == CCFPUmode); - %v<FPCMP:unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}" + * return output_fp_compare (insn, operands, true, <unordered>); + %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "fcmp,ssecomi") (set_attr "prefix" "orig,maybe_vex") (set_attr "mode" "<MODEF:MODE>") @@ -1737,21 +1612,6 @@ (eq_attr "alternative" "0") (symbol_ref "true") (symbol_ref "false"))))]) - -(define_insn "*cmpi<unord>xf_i387" - [(set (reg:FPCMP FLAGS_REG) - (compare:FPCMP - (match_operand:XF 0 "register_operand" "f") - (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE" - "* return output_fp_compare (insn, operands, true, - <MODE>mode == CCFPUmode);" - [(set_attr "type" "fcmp") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "direct") - (set_attr "bdver1_decode" "double") - (set_attr "znver1_decode" "double")]) ;; Push/pop instructions. @@ -1989,7 +1849,6 @@ "reload_completed" "xor{l}\t%k0, %k0" [(set_attr "type" "alu1") - (set_attr "modrm_class" "op0") (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) @@ -2013,7 +1872,7 @@ switch (get_attr_type (insn)) { case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: if (misaligned_operand (operands[0], XImode) @@ -2040,7 +1899,7 @@ switch (get_attr_type (insn)) { case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: if (misaligned_operand (operands[0], OImode) @@ -2086,7 +1945,7 @@ (define_insn "*movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd") - (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Ye,r"))] + (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,r"))] "(TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))) || (TARGET_SSE @@ -2100,7 +1959,7 @@ return "#"; case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: /* TDmode values are passed as TImode on the stack. Moving them @@ -2166,12 +2025,19 @@ (match_test "optimize_function_for_size_p (cfun)") (const_string "V4SF") ] - (const_string "TI")))]) + (const_string "TI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "6") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "7") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) (define_split [(set (match_operand:TI 0 "sse_reg_operand") (match_operand:TI 1 "general_reg_operand"))] - "TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC + "TARGET_64BIT && TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_merge:V2DI @@ -2188,9 +2054,9 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yd,?r ,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Ye,r ,*Yj,r ,*Yj ,*Yn ,*r,*km,*k,*k"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,v,*Yd,r ,*v,r ,*x ,*y ,*r,*km,*k,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2212,7 +2078,7 @@ return "movq\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: switch (get_attr_mode (insn)) @@ -2223,10 +2089,13 @@ && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) return "%vmovd\t{%1, %0|%0, %1}"; return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_TI: + /* Handle AVX512 registers set. */ + if (EXT_REX_SSE_REG_P (operands[0]) + || EXT_REX_SSE_REG_P (operands[1])) + return "vmovdqa64\t{%1, %0|%0, %1}"; return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_XI: - return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; case MODE_V2SF: gcc_assert (!TARGET_AVX); @@ -2265,8 +2134,12 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,17,18") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,19,20,23,25") + (eq_attr "alternative" "2,3,4,5,10,11,23,25") (const_string "x64") + (eq_attr "alternative" "19,20") + (const_string "x64_sse2") + (eq_attr "alternative" "21,22") + (const_string "sse2") ] (const_string "*"))) (set (attr "type") @@ -2318,7 +2191,7 @@ (eq_attr "alternative" "12,13") (cond [(ior (match_operand 0 "ext_sse_reg_operand") (match_operand 1 "ext_sse_reg_operand")) - (const_string "XI") + (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") @@ -2334,6 +2207,13 @@ (const_string "V2SF") ] (const_string "DI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "10,17,19") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "11,18,20") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "15") (if_then_else @@ -2351,7 +2231,7 @@ (define_split [(set (match_operand:<DWI> 0 "general_reg_operand") (match_operand:<DWI> 1 "sse_reg_operand"))] - "TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC + "TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_select:DWIH @@ -2375,7 +2255,7 @@ (define_split [(set (match_operand:DI 0 "sse_reg_operand") (match_operand:DI 1 "general_reg_operand"))] - "!TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC + "!TARGET_64BIT && TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_merge:V4SI @@ -2414,15 +2294,15 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?*Yi,*k,*k ,*rm") + "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,r ,*r,*km,*k"))] + "g ,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,*v,r ,*r,*km,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_MSKMOV: return "kmovd\t{%1, %0|%0, %1}"; @@ -2477,7 +2357,12 @@ gcc_unreachable (); } } - [(set (attr "type") + [(set (attr "isa") + (cond [(eq_attr "alternative" "12,13") + (const_string "sse2") + ] + (const_string "*"))) + (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "mmx") (eq_attr "alternative" "3,4,5,6,7") @@ -2522,7 +2407,14 @@ (not (match_test "TARGET_SSE2"))) (const_string "SF") ] - (const_string "SI")))]) + (const_string "SI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "6,12") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "7,13") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) (define_insn "*movhi_internal" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m") @@ -2841,7 +2733,6 @@ "reload_completed" "xor{<imodesuffix>}\t%0, %0" [(set_attr "type" "alu1") - (set_attr "modrm_class" "op0") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "0")]) @@ -3293,7 +3184,7 @@ switch (get_attr_type (insn)) { case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: /* Handle misaligned load/store since we @@ -3434,9 +3325,9 @@ ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi,r ,o ,r ,m") + "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m") (match_operand:DF 1 "general_operand" - "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r ,roF,rF,rmF,rC"))] + "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) @@ -3470,14 +3361,14 @@ return "mov{q}\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: switch (get_attr_mode (insn)) { case MODE_DF: if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) - return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + return "vmovsd\t{%d1, %0|%0, %d1}"; return "%vmovsd\t{%1, %0|%0, %1}"; case MODE_V4SF: @@ -3512,10 +3403,12 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "3,4,5,6,7,22,23") (const_string "nox64") - (eq_attr "alternative" "8,9,10,11,20,21,24,25") + (eq_attr "alternative" "8,9,10,11,24,25") (const_string "x64") (eq_attr "alternative" "12,13,14,15") (const_string "sse2") + (eq_attr "alternative" "20,21") + (const_string "x64_sse2") ] (const_string "*"))) (set (attr "type") @@ -3616,7 +3509,12 @@ (symbol_ref "true"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "3,4") - (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")] + (symbol_ref "TARGET_INTEGER_DFMODE_MOVES") + (eq_attr "alternative" "20") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "21") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "22,23,24,25") @@ -3638,9 +3536,9 @@ (define_insn "*movsf_internal" [(set (match_operand:SF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym,r ,m") + "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m") (match_operand:SF 1 "general_operand" - "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r ,rmF,rF"))] + "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) @@ -3664,14 +3562,14 @@ return "mov{l}\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands[1]); + return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: switch (get_attr_mode (insn)) { case MODE_SF: if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) - return "vmovss\t{%1, %0, %0|%0, %0, %1}"; + return "vmovss\t{%d1, %0|%0, %d1}"; return "%vmovss\t{%1, %0|%0, %1}"; case MODE_V16SF: @@ -3702,7 +3600,12 @@ gcc_unreachable (); } } - [(set (attr "type") + [(set (attr "isa") + (cond [(eq_attr "alternative" "14,15") + (const_string "sse2") + ] + (const_string "*"))) + (set (attr "type") (cond [(eq_attr "alternative" "0,1,2") (const_string "fmov") (eq_attr "alternative" "3,4,16,17") @@ -3764,6 +3667,13 @@ (const_string "SF")) ] (const_string "SF"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "9,14") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "10,15") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "16,17") (if_then_else @@ -3777,7 +3687,7 @@ (define_split [(set (match_operand 0 "any_fp_register_operand") - (match_operand 1 "nonimmediate_operand"))] + (match_operand 1 "memory_operand"))] "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode @@ -3789,7 +3699,7 @@ (define_split [(set (match_operand 0 "any_fp_register_operand") - (float_extend (match_operand 1 "nonimmediate_operand")))] + (float_extend (match_operand 1 "memory_operand")))] "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode @@ -3815,7 +3725,7 @@ operands[1] = CONST1_RTX (<MODE>mode); }) -(define_insn "swapxf" +(define_insn "*swapxf" [(set (match_operand:XF 0 "register_operand" "+f") (match_operand:XF 1 "register_operand" "+f")) (set (match_dup 1) @@ -3829,22 +3739,8 @@ } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) - -(define_insn "*swap<mode>" - [(set (match_operand:MODEF 0 "fp_register_operand" "+f") - (match_operand:MODEF 1 "fp_register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_80387 || reload_completed" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "<MODE>")]) + ;; Zero extension instructions (define_expand "zero_extendsidi2" @@ -3853,10 +3749,10 @@ (define_insn "*zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,?r,?o,r ,o,?*Ym,?!*y,?r ,?*Yi,*x,*x,*v,*r") + "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r") (zero_extend:DI (match_operand:SI 1 "x86_64_zext_operand" - "0 ,rm,r ,rmWz,0,r ,m ,*Yj,r ,m ,*x,*v,*k")))] + "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k")))] "" { switch (get_attr_type (insn)) @@ -3900,7 +3796,7 @@ (const_string "nox64") (eq_attr "alternative" "3") (const_string "x64") - (eq_attr "alternative" "9") + (eq_attr "alternative" "7,8,9") (const_string "sse2") (eq_attr "alternative" "10") (const_string "sse4") @@ -3946,7 +3842,14 @@ (eq_attr "alternative" "8,10,11") (const_string "TI") ] - (const_string "SI")))]) + (const_string "SI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "7") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "5,8") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) (define_split [(set (match_operand:DI 0 "memory_operand") @@ -3972,15 +3875,6 @@ (set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") -(define_peephole2 - [(set (match_operand:DI 0 "general_reg_operand") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand"))) - (set (match_operand:DI 2 "sse_reg_operand") (match_dup 0))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) - (zero_extend:DI (match_dup 1)))]) - (define_mode_attr kmov_isa [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")]) @@ -4423,6 +4317,40 @@ } }) +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "SF,XF,DF") + (set (attr "enabled") + (if_then_else + (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "true") + (symbol_ref "false"))))]) + /* For converting SF(xmm2) to DF(xmm1), use the following code instead of cvtss2sd: unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs @@ -4490,39 +4418,31 @@ (set (match_dup 0) (float_extend:DF (match_dup 2)))] "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") -(define_insn "*extendsfdf2" - [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") +;; Break partial reg stall for cvtss2sd. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. + +(define_split + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" -{ - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,ssecvt") - (set_attr "prefix" "orig,orig,maybe_vex") - (set_attr "mode" "SF,XF,DF") - (set (attr "enabled") - (if_then_else - (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) - (if_then_else - (eq_attr "alternative" "0,1") - (symbol_ref "TARGET_MIX_SSE_I387") - (symbol_ref "true")) - (if_then_else - (eq_attr "alternative" "0,1") - (symbol_ref "true") - (symbol_ref "false"))))]) + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && optimize_function_for_speed_p (cfun) + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) + && (!EXT_REX_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF + (float_extend:DF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); + emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); +}) (define_expand "extend<mode>xf2" [(set (match_operand:XF 0 "nonimmediate_operand") @@ -4561,23 +4481,43 @@ ;; Conversion from DFmode to SFmode. -(define_expand "truncdfsf2" - [(set (match_operand:SF 0 "nonimmediate_operand") +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand")))] + (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { - if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) - ; - else if (flag_unsafe_math_optimizations) - ; - else - { - rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP); - emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); - DONE; - } -}) + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "mode" "SF") + (set (attr "enabled") + (if_then_else + (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) + (cond [(eq_attr "alternative" "0") + (symbol_ref "TARGET_MIX_SSE_I387") + (eq_attr "alternative" "1") + (symbol_ref "TARGET_MIX_SSE_I387 + && flag_unsafe_math_optimizations") + ] + (symbol_ref "true")) + (cond [(eq_attr "alternative" "0") + (symbol_ref "true") + (eq_attr "alternative" "1") + (symbol_ref "flag_unsafe_math_optimizations") + ] + (symbol_ref "false"))))]) /* For converting DF(xmm2) to SF(xmm1), use the following code instead of cvtsd2ss: @@ -4588,7 +4528,7 @@ anyway. */ (define_split [(set (match_operand:SF 0 "sse_reg_operand") - (float_truncate:SF + (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () @@ -4625,7 +4565,7 @@ CONST0_RTX (DFmode))); }) -;; It's more profitable to split and then extend in the same register. +;; It's more profitable to split and then truncate in the same register. (define_peephole2 [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF @@ -4636,197 +4576,47 @@ (set (match_dup 0) (float_truncate:SF (match_dup 2)))] "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") -(define_expand "truncdfsf2_with_temp" - [(parallel [(set (match_operand:SF 0) - (float_truncate:SF (match_operand:DF 1))) - (clobber (match_operand:SF 2))])]) - -;; SSE alternative doesn't depend on flag_unsafe_math_optimizations, -;; because nothing we do there is unsafe. -(define_insn "*truncdfsf_fast_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,v") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f ,vm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - case 1: - return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,ssecvt") - (set_attr "prefix" "orig,maybe_vex") - (set_attr "mode" "SF") - (set (attr "enabled") - (cond [(eq_attr "alternative" "0") - (symbol_ref "TARGET_MIX_SSE_I387 - && flag_unsafe_math_optimizations") - ] - (symbol_ref "true")))]) - -(define_insn "*truncdfsf_fast_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") +;; Break partial reg stall for cvtsd2ss. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. + +(define_split + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f")))] - "TARGET_80387 && flag_unsafe_math_optimizations" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_insn "*truncdfsf_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,v ,?f,?v,?*r") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f ,vm,f ,f ,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] - "TARGET_MIX_SSE_I387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - case 1: - return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; - - default: - return "#"; - } -} - [(set_attr "isa" "*,sse2,*,*,*") - (set_attr "type" "fmov,ssecvt,multi,multi,multi") - (set_attr "unit" "*,*,i387,i387,i387") - (set_attr "prefix" "orig,maybe_vex,orig,orig,orig") - (set_attr "mode" "SF")]) - -(define_insn "*truncdfsf_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?v,?*r") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - default: - return "#"; - } -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") - (set_attr "mode" "SF")]) - -(define_insn "*truncdfsf2_i387_1" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:DF 1 "register_operand" "f")))] - "TARGET_80387 - && !(TARGET_SSE2 && TARGET_SSE_MATH) - && !TARGET_MIX_SSE_I387" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_split - [(set (match_operand:SF 0 "register_operand") - (float_truncate:SF - (match_operand:DF 1 "fp_register_operand"))) - (clobber (match_operand 2))] - "reload_completed" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));") + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && optimize_function_for_speed_p (cfun) + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) + && (!EXT_REX_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:SF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); + emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); +}) ;; Conversion from XFmode to {SF,DF}mode -(define_expand "truncxf<mode>2" - [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand") - (float_truncate:MODEF - (match_operand:XF 1 "register_operand"))) - (clobber (match_dup 2))])] - "TARGET_80387" -{ - if (flag_unsafe_math_optimizations) - { - rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode); - emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1])); - if (reg != operands[0]) - emit_move_insn (operands[0], reg); - DONE; - } - else - operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); -}) - -(define_insn "*truncxfsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?v,?*r") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] - "TARGET_80387" -{ - gcc_assert (!which_alternative); - return output_387_reg_move (insn, operands); -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") - (set_attr "mode" "SF")]) - -(define_insn "*truncxfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?v,?*r") - (float_truncate:DF - (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] - "TARGET_80387" -{ - gcc_assert (!which_alternative); - return output_387_reg_move (insn, operands); -} - [(set_attr "isa" "*,*,sse2,*") - (set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") - (set_attr "mode" "DF")]) - -(define_insn "truncxf<mode>2_i387_noop" - [(set (match_operand:MODEF 0 "register_operand" "=f") +(define_insn "truncxf<mode>2" + [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f") (float_truncate:MODEF - (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387 && flag_unsafe_math_optimizations" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "<MODE>")]) - -(define_insn "*truncxf<mode>2_i387" - [(set (match_operand:MODEF 0 "memory_operand" "=m") - (float_truncate:MODEF - (match_operand:XF 1 "register_operand" "f")))] + (match_operand:XF 1 "register_operand" "f,f")))] "TARGET_80387" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "<MODE>")]) - -(define_split - [(set (match_operand:MODEF 0 "register_operand") - (float_truncate:MODEF - (match_operand:XF 1 "register_operand"))) - (clobber (match_operand:MODEF 2 "memory_operand"))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) - (set (match_dup 0) (match_dup 2))]) - -(define_split - [(set (match_operand:MODEF 0 "memory_operand") - (float_truncate:MODEF - (match_operand:XF 1 "register_operand"))) - (clobber (match_operand:MODEF 2 "memory_operand"))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]) + (set_attr "mode" "<MODE>") + (set (attr "enabled") + (cond [(eq_attr "alternative" "1") + (symbol_ref "flag_unsafe_math_optimizations") + ] + (symbol_ref "true")))]) ;; Signed conversion to DImode. @@ -4838,7 +4628,7 @@ { if (TARGET_FISTTP) { - emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1])); DONE; } }) @@ -4852,7 +4642,7 @@ if (TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) { - emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1])); DONE; } if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)) @@ -4875,7 +4665,7 @@ { if (TARGET_FISTTP) { - emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1])); DONE; } }) @@ -4889,7 +4679,7 @@ if (TARGET_FISTTP && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) { - emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1])); DONE; } if (SSE_FLOAT_MODE_P (<MODE>mode)) @@ -4913,11 +4703,23 @@ { if (TARGET_FISTTP) { - emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1])); DONE; } }) +;; Unsigned conversion to DImode + +(define_insn "fixuns_trunc<mode>di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unsigned_fix:DI + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "DI")]) + ;; Unsigned conversion to SImode. (define_expand "fixuns_trunc<mode>si2" @@ -4928,13 +4730,19 @@ (use (match_dup 2)) (clobber (match_scratch:<ssevecmode> 3)) (clobber (match_scratch:<ssevecmode> 4))])] - "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" + "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { machine_mode mode = <MODE>mode; machine_mode vecmode = <ssevecmode>mode; REAL_VALUE_TYPE TWO31r; rtx two31; + if (TARGET_AVX512F) + { + emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1])); + DONE; + } + if (optimize_insn_for_size_p ()) FAIL; @@ -4944,6 +4752,27 @@ operands[2] = force_reg (vecmode, two31); }) +(define_insn "fixuns_trunc<mode>si2_avx512f" + [(set (match_operand:SI 0 "register_operand" "=r") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + +(define_insn "*fixuns_trunc<mode>si2_avx512f_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))] + "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*fixuns_trunc<mode>_1" [(set (match_operand:SI 0 "register_operand" "=&x,&x") (unsigned_fix:SI @@ -5003,37 +4832,10 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (fix:SWI48 (match_dup 2)))]) -(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1" - [(set (match_operand:SWI248x 0 "nonimmediate_operand") - (fix:SWI248x (match_operand 1 "register_operand")))] - "X87_FLOAT_MODE_P (GET_MODE (operands[1])) - && TARGET_FISTTP - && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && (TARGET_64BIT || <MODE>mode != DImode)) - && TARGET_SSE_MATH) - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1])); - else - { - operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); - emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0], - operands[1], - operands[2])); - } - DONE; -} - [(set_attr "type" "fisttp") - (set_attr "mode" "<MODE>")]) - (define_insn "fix_trunc<mode>_i387_fisttp" - [(set (match_operand:SWI248x 0 "memory_operand" "=m") + [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m") (fix:SWI248x (match_operand 1 "register_operand" "f"))) - (clobber (match_scratch:XF 2 "=&1f"))] + (clobber (match_scratch:XF 2 "=&f"))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) @@ -5043,39 +4845,6 @@ [(set_attr "type" "fisttp") (set_attr "mode" "<MODE>")]) -(define_insn "fix_trunc<mode>_i387_fisttp_with_temp" - [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m,?r") - (fix:SWI248x (match_operand 1 "register_operand" "f,f"))) - (clobber (match_operand:SWI248x 2 "memory_operand" "=X,m")) - (clobber (match_scratch:XF 3 "=&1f,&1f"))] - "X87_FLOAT_MODE_P (GET_MODE (operands[1])) - && TARGET_FISTTP - && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && (TARGET_64BIT || <MODE>mode != DImode)) - && TARGET_SSE_MATH)" - "#" - [(set_attr "type" "fisttp") - (set_attr "mode" "<MODE>")]) - -(define_split - [(set (match_operand:SWI248x 0 "register_operand") - (fix:SWI248x (match_operand 1 "register_operand"))) - (clobber (match_operand:SWI248x 2 "memory_operand")) - (clobber (match_scratch 3))] - "reload_completed" - [(parallel [(set (match_dup 2) (fix:SWI248x (match_dup 1))) - (clobber (match_dup 3))]) - (set (match_dup 0) (match_dup 2))]) - -(define_split - [(set (match_operand:SWI248x 0 "memory_operand") - (fix:SWI248x (match_operand 1 "register_operand"))) - (clobber (match_operand:SWI248x 2 "memory_operand")) - (clobber (match_scratch 3))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:SWI248x (match_dup 1))) - (clobber (match_dup 3))])]) - ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG @@ -5098,16 +4867,9 @@ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); - emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } + + emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1], + operands[2], operands[3])); DONE; } [(set_attr "type" "fistp") @@ -5115,11 +4877,11 @@ (set_attr "mode" "<MODE>")]) (define_insn "fix_truncdi_i387" - [(set (match_operand:DI 0 "memory_operand" "=m") + [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (fix:DI (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) - (clobber (match_scratch:XF 4 "=&1f"))] + (clobber (match_scratch:XF 4 "=&f"))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" @@ -5128,50 +4890,8 @@ (set_attr "i387_cw" "trunc") (set_attr "mode" "DI")]) -(define_insn "fix_truncdi_i387_with_temp" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=X,m")) - (clobber (match_scratch:XF 5 "=&1f,&1f"))] - "X87_FLOAT_MODE_P (GET_MODE (operands[1])) - && !TARGET_FISTTP - && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "DI")]) - -(define_split - [(set (match_operand:DI 0 "register_operand") - (fix:DI (match_operand 1 "register_operand"))) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:DI 4 "memory_operand")) - (clobber (match_scratch 5))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))]) - -(define_split - [(set (match_operand:DI 0 "memory_operand") - (fix:DI (match_operand 1 "register_operand"))) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:DI 4 "memory_operand")) - (clobber (match_scratch 5))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))])]) - (define_insn "fix_trunc<mode>_i387" - [(set (match_operand:SWI24 0 "memory_operand" "=m") + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (fix:SWI24 (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] @@ -5183,46 +4903,9 @@ (set_attr "i387_cw" "trunc") (set_attr "mode" "<MODE>")]) -(define_insn "fix_trunc<mode>_i387_with_temp" - [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r") - (fix:SWI24 (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))] - "X87_FLOAT_MODE_P (GET_MODE (operands[1])) - && !TARGET_FISTTP - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "<MODE>")]) - -(define_split - [(set (match_operand:SWI24 0 "register_operand") - (fix:SWI24 (match_operand 1 "register_operand"))) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:SWI24 4 "memory_operand"))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:SWI24 (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))]) - -(define_split - [(set (match_operand:SWI24 0 "memory_operand") - (fix:SWI24 (match_operand 1 "register_operand"))) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:SWI24 4 "memory_operand"))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:SWI24 (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))])]) - (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") - (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] + (unspec:HI [(const_int 0)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" [(set (attr "length") @@ -5230,19 +4913,6 @@ (set_attr "mode" "HI") (set_attr "unit" "i387") (set_attr "bdver1_decode" "vector")]) - -(define_insn "x86_fldcw_1" - [(set (reg:HI FPCR_REG) - (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] - "TARGET_80387" - "fldcw\t%0" - [(set (attr "length") - (symbol_ref "ix86_attr_length_address_default (insn) + 2")) - (set_attr "mode" "HI") - (set_attr "unit" "i387") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector") - (set_attr "bdver1_decode" "vector")]) ;; Conversion between fixed point and floating point. @@ -5272,36 +4942,19 @@ (set_attr "znver1_decode" "double") (set_attr "fp_int_src" "true")]) -(define_expand "float<SWI48:mode><MODEF:mode>2" +(define_expand "float<SWI48x:mode><MODEF:mode>2" [(set (match_operand:MODEF 0 "register_operand") - (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] - "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)" -{ - if (!(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) - && !X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)) - { - rtx reg = gen_reg_rtx (XFmode); - rtx (*insn)(rtx, rtx); - - emit_insn (gen_float<SWI48:mode>xf2 (reg, operands[1])); - - if (<MODEF:MODE>mode == SFmode) - insn = gen_truncxfsf2; - else if (<MODEF:MODE>mode == DFmode) - insn = gen_truncxfdf2; - else - gcc_unreachable (); - - emit_insn (insn (operands[0], reg)); - DONE; - } -}) - -(define_insn "*float<SWI48:mode><MODEF:mode>2_mixed" - [(set (match_operand:MODEF 0 "register_operand" "=f,Yc,v") + (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH + && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))") + +(define_insn "*float<SWI48:mode><MODEF:mode>2" + [(set (match_operand:MODEF 0 "register_operand" "=f,v,v") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))] - "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH" + "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)" "@ fild%Z1\t%1 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1} @@ -5322,17 +4975,28 @@ (set_attr "znver1_decode" "double,*,*") (set_attr "fp_int_src" "true") (set (attr "enabled") - (cond [(eq_attr "alternative" "0") - (symbol_ref "TARGET_MIX_SSE_I387 - && X87_ENABLE_FLOAT (<MODEF:MODE>mode, - <SWI48:MODE>mode)") - ] - (symbol_ref "true")))]) - -(define_insn "*float<SWI48x:mode><MODEF:mode>2_i387" + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "TARGET_MIX_SSE_I387 + && X87_ENABLE_FLOAT (<MODEF:MODE>mode, + <SWI48:MODE>mode)") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "true") + (symbol_ref "false")))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")] + (symbol_ref "true")))]) + +(define_insn "*floatdi<MODEF:mode>2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f") - (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))] - "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)" + (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)" "fild%Z1\t%1" [(set_attr "type" "fmov") (set_attr "mode" "<MODEF:MODE>") @@ -5367,6 +5031,56 @@ DONE; }) +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand") + (float:X87MODEF + (match_operand:DI 1 "register_operand")))] + "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && optimize_function_for_speed_p (cfun) + && can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_floatdi<mode>2_i387_with_xmm + (operands[0], operands[1], + assign_386_stack_local (DImode, SLOT_TEMP))); + DONE; +}) + +(define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:DI 1 "register_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=x")) + (clobber (match_scratch:V4SI 4 "=x")) + (clobber (match_operand:DI 2 "memory_operand" "=m"))] + "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] +{ + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_lowpart (SImode, operands[1]))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], + operands[4])); + + operands[3] = gen_lowpart (DImode, operands[3]); +} + [(set_attr "type" "multi") + (set_attr "mode" "<X87MODEF:MODE>") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) + ;; Avoid partial SSE register dependency stalls. This splitter should split ;; late in the pass sequence (after register rename pass), so allocated ;; registers won't change anymore @@ -5392,114 +5106,6 @@ emit_move_insn (operands[0], CONST0_RTX (vmode)); }) -;; Break partial reg stall for cvtsd2ss. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. - -(define_split - [(set (match_operand:SF 0 "sse_reg_operand") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed - && optimize_function_for_speed_p (cfun) - && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) - && (!EXT_REX_SSE_REG_P (operands[0]) - || TARGET_AVX512VL)" - [(set (match_dup 0) - (vec_merge:V4SF - (vec_duplicate:V4SF - (float_truncate:SF - (match_dup 1))) - (match_dup 0) - (const_int 1)))] -{ - operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); - emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); -}) - -;; Break partial reg stall for cvtss2sd. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. - -(define_split - [(set (match_operand:DF 0 "sse_reg_operand") - (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand")))] - "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed - && optimize_function_for_speed_p (cfun) - && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) - && (!EXT_REX_SSE_REG_P (operands[0]) - || TARGET_AVX512VL)" - [(set (match_dup 0) - (vec_merge:V2DF - (vec_duplicate:V2DF - (float_extend:DF - (match_dup 1))) - (match_dup 0) - (const_int 1)))] -{ - operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); - emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); -}) - -;; Avoid store forwarding (partial memory) stall penalty -;; by passing DImode value through XMM registers. */ - -(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm" - [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") - (float:X87MODEF - (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) - (clobber (match_scratch:V4SI 3 "=X,x")) - (clobber (match_scratch:V4SI 4 "=X,x")) - (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] - "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" - "#" - [(set_attr "type" "multi") - (set_attr "mode" "<X87MODEF:MODE>") - (set_attr "unit" "i387") - (set_attr "fp_int_src" "true")]) - -(define_split - [(set (match_operand:X87MODEF 0 "fp_register_operand") - (float:X87MODEF (match_operand:DI 1 "register_operand"))) - (clobber (match_scratch:V4SI 3)) - (clobber (match_scratch:V4SI 4)) - (clobber (match_operand:DI 2 "memory_operand"))] - "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun) - && reload_completed" - [(set (match_dup 2) (match_dup 3)) - (set (match_dup 0) (float:X87MODEF (match_dup 2)))] -{ - /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). - Assemble the 64-bit DImode value in an xmm register. */ - emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), - gen_lowpart (SImode, operands[1]))); - emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), - gen_highpart (SImode, operands[1]))); - emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], - operands[4])); - - operands[3] = gen_lowpart (DImode, operands[3]); -}) - -(define_split - [(set (match_operand:X87MODEF 0 "fp_register_operand") - (float:X87MODEF (match_operand:DI 1 "memory_operand"))) - (clobber (match_scratch:V4SI 3)) - (clobber (match_scratch:V4SI 4)) - (clobber (match_operand:DI 2 "memory_operand"))] - "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun) - && reload_completed" - [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) - (define_expand "floatuns<SWI12:mode><MODEF:mode>2" [(set (match_operand:MODEF 0 "register_operand") (unsigned_float:MODEF @@ -5512,16 +5118,26 @@ DONE; }) +(define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512" + [(set (match_operand:MODEF 0 "register_operand" "=v") + (unsigned_float:MODEF + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODEF:MODE>")]) + ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two ;; SImode values to stack. Also note that fild loads from memory only. -(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm" +(define_insn_and_split "floatunssi<mode>2_i387_with_xmm" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:DI 3 "=x")) - (clobber (match_operand:DI 2 "memory_operand" "=m"))] + (clobber (match_operand:DI 2 "memory_operand" "=m")) + (clobber (match_scratch:DI 3 "=x"))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" @@ -5536,43 +5152,59 @@ (set_attr "mode" "<MODE>")]) (define_expand "floatunssi<mode>2" - [(parallel - [(set (match_operand:X87MODEF 0 "register_operand") - (unsigned_float:X87MODEF - (match_operand:SI 1 "nonimmediate_operand"))) - (clobber (match_scratch:DI 3)) - (clobber (match_dup 2))])] - "!TARGET_64BIT - && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" -{ - if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand")))] + "(!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) + || ((!TARGET_64BIT || TARGET_AVX512F) + && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" +{ + if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_floatunssi<mode>2_i387_with_xmm + (operands[0], operands[1], + assign_386_stack_local (DImode, SLOT_TEMP))); + DONE; + } + if (!TARGET_AVX512F) { ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]); DONE; } - else - operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); }) (define_expand "floatunsdisf2" - [(use (match_operand:SF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] + [(set (match_operand:SF 0 "register_operand") + (unsigned_float:SF + (match_operand:DI 1 "nonimmediate_operand")))] "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH" - "x86_emit_floatuns (operands); DONE;") +{ + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } +}) (define_expand "floatunsdidf2" - [(use (match_operand:DF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] - "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + [(set (match_operand:DF 0 "register_operand") + (unsigned_float:DF + (match_operand:DI 1 "nonimmediate_operand")))] + "(TARGET_KEEPS_VECTOR_ALIGNED_STACK || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { - if (TARGET_64BIT) - x86_emit_floatuns (operands); - else - ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); - DONE; + if (!TARGET_64BIT) + { + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; + } + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } }) ;; Load effective address instructions @@ -6823,6 +6455,20 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*add<mode>3_carry_0" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (plus:SWI + (match_operator:SWI 3 "ix86_carry_flag_operator" + [(match_operand 2 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)" + "adc{<imodesuffix>}\t{$0, %0|%0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -6839,6 +6485,20 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) +(define_insn "*addsi3_carry_zext_0" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 1 "register_operand" "0")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "adc{l}\t{$0, %k0|%k0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + ;; There is no point to generate ADCX instruction. ADC is shorter and faster. (define_insn "addcarry<mode>" @@ -6895,6 +6555,20 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*sub<mode>3_carry_0" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operator:SWI 3 "ix86_carry_flag_operator" + [(match_operand 2 "flags_reg_operand") (const_int 0)]))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)" + "sbb{<imodesuffix>}\t{$0, %0|%0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*subsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -6912,6 +6586,21 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) +(define_insn "*subsi3_carry_zext_0" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI + (match_operand:SI 1 "register_operand" "0") + (match_operator:SI 2 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)])))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "sbb{l}\t{$0, %k0|%k0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + (define_insn "sub<mode>3_carry_ccc" [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -7577,16 +7266,16 @@ (set_attr "mode" "QI")]) (define_expand "<s>mul<mode>3_highpart" - [(parallel [(set (match_operand:SWI48 0 "register_operand") - (truncate:SWI48 + [(parallel [(set (match_operand:DWIH 0 "register_operand") + (truncate:DWIH (lshiftrt:<DWI> (mult:<DWI> (any_extend:<DWI> - (match_operand:SWI48 1 "nonimmediate_operand")) + (match_operand:DWIH 1 "nonimmediate_operand")) (any_extend:<DWI> - (match_operand:SWI48 2 "register_operand"))) + (match_operand:DWIH 2 "register_operand"))) (match_dup 3)))) - (clobber (match_scratch:SWI48 4)) + (clobber (match_scratch:DWIH 4)) (clobber (reg:CC FLAGS_REG))])] "" "operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));") @@ -9225,14 +8914,14 @@ }) (define_insn "*andndi3_doubleword" - [(set (match_operand:DI 0 "register_operand" "=r,&r") + [(set (match_operand:DI 0 "register_operand" "=&r,r,r,&r") (and:DI - (not:DI (match_operand:DI 1 "register_operand" "r,0")) - (match_operand:DI 2 "nonimmediate_operand" "rm,rm"))) + (not:DI (match_operand:DI 1 "register_operand" "r,0,r,0")) + (match_operand:DI 2 "nonimmediate_operand" "rm,rm,0,rm"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_STV && TARGET_SSE2" "#" - [(set_attr "isa" "bmi,*")]) + [(set_attr "isa" "bmi,bmi,bmi,*")]) (define_split [(set (match_operand:DI 0 "register_operand") @@ -9983,36 +9672,9 @@ "TARGET_80387 && (reload_completed || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" - "f<absneg_mnemonic>" - [(set_attr "type" "fsgn") - (set_attr "mode" "<MODE>")]) - -(define_insn "*<code>extendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (absneg:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" - "f<absneg_mnemonic>" + "<absneg_mnemonic>" [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*<code>extendsfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (absneg:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "f<absneg_mnemonic>" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*<code>extenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (absneg:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "f<absneg_mnemonic>" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) + (set_attr "mode" "<MODE>")]) ;; Copysign instructions @@ -10030,7 +9692,7 @@ (define_insn_and_split "copysign<mode>3_const" [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv") (unspec:CSGNMODE - [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "YvmC") + [(match_operand:<CSGNVMODE> 1 "nonimm_or_0_operand" "YvmC") (match_operand:CSGNMODE 2 "register_operand" "0") (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "Yvm")] UNSPEC_COPYSIGN))] @@ -10209,6 +9871,87 @@ "" "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;") +(define_insn_and_split "*ashl<dwi>3_doubleword_mask" + [(set (match_operand:<DWI> 0 "register_operand") + (ashift:<DWI> + (match_operand:<DWI> 1 "register_operand") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 6) + (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2)) + (lshiftrt:DWIH (match_dup 5) + (minus:QI (match_dup 8) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 4) + (ashift:DWIH (match_dup 5) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]); + + operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT); + + if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + != ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + { + rtx tem = gen_reg_rtx (SImode); + emit_insn (gen_andsi3 (tem, operands[2], operands[3])); + operands[2] = tem; + } + + operands[2] = gen_lowpart (QImode, operands[2]); + + if (!rtx_equal_p (operands[6], operands[7])) + emit_move_insn (operands[6], operands[7]); +}) + +(define_insn_and_split "*ashl<dwi>3_doubleword_mask_1" + [(set (match_operand:<DWI> 0 "register_operand") + (ashift:<DWI> + (match_operand:<DWI> 1 "register_operand") + (and:QI + (match_operand:QI 2 "register_operand" "c") + (match_operand:QI 3 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 6) + (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2)) + (lshiftrt:DWIH (match_dup 5) + (minus:QI (match_dup 8) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 4) + (ashift:DWIH (match_dup 5) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]); + + operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT); + + if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + != ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + { + rtx tem = gen_reg_rtx (QImode); + emit_insn (gen_andqi3 (tem, operands[2], operands[3])); + operands[2] = tem; + } + + if (!rtx_equal_p (operands[6], operands[7])) + emit_move_insn (operands[6], operands[7]); +}) + (define_insn "*ashl<mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r") (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n") @@ -10328,7 +10071,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) @@ -10342,14 +10085,15 @@ (ashift:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*ashl<mode>3_mask_1" [(set (match_operand:SWI48 0 "nonimmediate_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (and:QI - (match_operand:QI 2 "register_operand") + (match_operand:QI 2 "register_operand" "c,r") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) @@ -10362,7 +10106,9 @@ [(set (match_dup 0) (ashift:SWI48 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "isa" "*,bmi2")]) (define_insn "*bmi2_ashl<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -10622,7 +10368,7 @@ { switch (get_attr_type (insn)) { - case TYPE_ALU: + case TYPE_ALU1: gcc_assert (operands[1] == const1_rtx); return "add{b}\t%0, %0"; @@ -10638,12 +10384,12 @@ (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 1 "const1_operand")) - (const_string "alu") + (const_string "alu1") ] (const_string "ishift1"))) (set (attr "length_immediate") (if_then_else - (ior (eq_attr "type" "alu") + (ior (eq_attr "type" "alu1") (and (eq_attr "type" "ishift1") (and (match_operand 1 "const1_operand") (ior (match_test "TARGET_SHIFT1") @@ -10848,7 +10594,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -10862,14 +10608,15 @@ (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*<shift_insn><mode>3_mask_1" [(set (match_operand:SWI48 0 "nonimmediate_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (and:QI - (match_operand:QI 2 "register_operand") + (match_operand:QI 2 "register_operand" "c,r") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -10882,7 +10629,90 @@ [(set (match_dup 0) (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "isa" "*,bmi2")]) + +(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask" + [(set (match_operand:<DWI> 0 "register_operand") + (any_shiftrt:<DWI> + (match_operand:<DWI> 1 "register_operand") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 4) + (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2)) + (ashift:DWIH (match_dup 7) + (minus:QI (match_dup 8) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 6) + (any_shiftrt:DWIH (match_dup 7) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]); + + operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT); + + if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + != ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + { + rtx tem = gen_reg_rtx (SImode); + emit_insn (gen_andsi3 (tem, operands[2], operands[3])); + operands[2] = tem; + } + + operands[2] = gen_lowpart (QImode, operands[2]); + + if (!rtx_equal_p (operands[4], operands[5])) + emit_move_insn (operands[4], operands[5]); +}) + +(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask_1" + [(set (match_operand:<DWI> 0 "register_operand") + (any_shiftrt:<DWI> + (match_operand:<DWI> 1 "register_operand") + (and:QI + (match_operand:QI 2 "register_operand" "c") + (match_operand:QI 3 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 4) + (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2)) + (ashift:DWIH (match_dup 7) + (minus:QI (match_dup 8) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 6) + (any_shiftrt:DWIH (match_dup 7) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]); + + operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT); + + if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + != ((<MODE_SIZE> * BITS_PER_UNIT) - 1)) + { + rtx tem = gen_reg_rtx (QImode); + emit_insn (gen_andqi3 (tem, operands[2], operands[3])); + operands[2] = tem; + } + + if (!rtx_equal_p (operands[4], operands[5])) + emit_move_insn (operands[4], operands[5]); +}) (define_insn_and_split "*<shift_insn><mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r") @@ -11327,7 +11157,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -11348,7 +11178,7 @@ (any_rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (and:QI - (match_operand:QI 2 "register_operand") + (match_operand:QI 2 "register_operand" "c") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -11612,7 +11442,7 @@ (set_attr "mode" "QI")]) (define_split - [(set (match_operand:HI 0 "register_operand") + [(set (match_operand:HI 0 "QIreg_operand") (any_rotate:HI (match_dup 0) (const_int 8))) (clobber (reg:CC FLAGS_REG))] "reload_completed @@ -12204,8 +12034,7 @@ (lt (minus (match_dup 0) (pc)) (const_int 128))) (const_int 2) - (const_int 6))) - (set_attr "maybe_prefix_bnd" "1")]) + (const_int 6)))]) ;; In general it is not safe to assume too much about CCmode registers, ;; so simplify-rtx stops when it sees a second one. Under certain @@ -12273,24 +12102,27 @@ (lt (minus (match_dup 0) (pc)) (const_int 128))) (const_int 2) - (const_int 5))) - (set_attr "maybe_prefix_bnd" "1")]) + (const_int 5)))]) (define_expand "indirect_jump" [(set (pc) (match_operand 0 "indirect_branch_operand"))] "" { - if (TARGET_X32) + if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; }) (define_insn "*indirect_jump" [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))] "" - "%!jmp\t%A0" - [(set_attr "type" "ibr") - (set_attr "length_immediate" "0") - (set_attr "maybe_prefix_bnd" "1")]) + "* return ix86_output_indirect_jmp (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") + (const_string "multi") + (const_string "ibr"))) + (set_attr "length_immediate" "0")]) (define_expand "tablejump" [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand")) @@ -12329,18 +12161,22 @@ OPTAB_DIRECT); } - if (TARGET_X32) + if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; }) (define_insn "*tablejump_1" [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw")) (use (label_ref (match_operand 1)))] "" - "%!jmp\t%A0" - [(set_attr "type" "ibr") - (set_attr "length_immediate" "0") - (set_attr "maybe_prefix_bnd" "1")]) + "* return ix86_output_indirect_jmp (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") + (const_string "multi") + (const_string "ibr"))) + (set_attr "length_immediate" "0")]) ;; Convert setcc + movzbl to xor + setcc if operands don't overlap. @@ -12375,6 +12211,7 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) + && ! reg_overlap_mentioned_p (operands[3], operands[4]) && ! reg_set_p (operands[3], operands[4]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) @@ -12400,6 +12237,7 @@ || operands_match_p (operands[2], operands[4])) && ! reg_overlap_mentioned_p (operands[4], operands[0]) && ! reg_overlap_mentioned_p (operands[4], operands[1]) + && ! reg_overlap_mentioned_p (operands[4], operands[5]) && ! reg_set_p (operands[4], operands[5]) && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) && peep2_regno_dead_p (0, FLAGS_REG)" @@ -12449,6 +12287,7 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) + && ! reg_overlap_mentioned_p (operands[3], operands[4]) && ! reg_set_p (operands[3], operands[4]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) @@ -12475,6 +12314,7 @@ || operands_match_p (operands[2], operands[4])) && ! reg_overlap_mentioned_p (operands[4], operands[0]) && ! reg_overlap_mentioned_p (operands[4], operands[1]) + && ! reg_overlap_mentioned_p (operands[4], operands[5]) && ! reg_set_p (operands[4], operands[5]) && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) && peep2_regno_dead_p (0, FLAGS_REG)" @@ -12554,7 +12394,10 @@ (match_operand:SI 0 "register_no_elim_operand" "U") (match_operand:SI 1 "GOT32_symbol_operand")))) (match_operand 2))] - "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "!TARGET_MACHO + && !TARGET_64BIT + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (insn)" { rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]); fnaddr = gen_const_mem (SImode, fnaddr); @@ -12573,7 +12416,7 @@ [(call (mem:QI (match_operand:W 0 "memory_operand" "m")) (match_operand 1)) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] - "!TARGET_X32" + "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) @@ -12582,7 +12425,9 @@ (match_operand:W 1 "memory_operand")) (call (mem:QI (match_dup 0)) (match_operand 3))] - "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (1)) + "!TARGET_X32 + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(call (mem:QI (match_dup 1)) @@ -12595,7 +12440,9 @@ (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (call (mem:QI (match_dup 0)) (match_operand 3))] - "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (2)) + "!TARGET_X32 + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) @@ -12617,7 +12464,7 @@ }) (define_insn "*call_pop" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lmBz")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -12637,7 +12484,7 @@ [(set_attr "type" "call")]) (define_insn "*sibcall_pop_memory" - [(call (mem:QI (match_operand:SI 0 "memory_operand" "m")) + [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -12691,7 +12538,9 @@ [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (set (pc) (match_dup 0))] - "!TARGET_X32 && peep2_reg_dead_p (2, operands[0])" + "!TARGET_X32 + && !TARGET_INDIRECT_BRANCH_REGISTER + && peep2_reg_dead_p (2, operands[0])" [(set (pc) (match_dup 1))]) ;; Call subroutine, returning value in operand 0 @@ -12751,7 +12600,10 @@ (match_operand:SI 1 "register_no_elim_operand" "U") (match_operand:SI 2 "GOT32_symbol_operand")))) (match_operand 3)))] - "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "!TARGET_MACHO + && !TARGET_64BIT + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (insn)" { rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]); fnaddr = gen_const_mem (SImode, fnaddr); @@ -12772,7 +12624,7 @@ (call (mem:QI (match_operand:W 1 "memory_operand" "m")) (match_operand 2))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] - "!TARGET_X32" + "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) @@ -12782,7 +12634,9 @@ (set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3)))] - "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (1)) + "!TARGET_X32 + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(set (match_dup 2) @@ -12797,7 +12651,9 @@ (set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3)))] - "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (2)) + "!TARGET_X32 + && !TARGET_INDIRECT_BRANCH_REGISTER + && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) @@ -12822,7 +12678,7 @@ (define_insn "*call_value_pop" [(set (match_operand 0) - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lmBz")) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz")) (match_operand 2))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -13032,12 +12888,11 @@ (define_insn "simple_return_internal" [(simple_return)] "reload_completed" - "%!ret" + "* return ix86_output_function_return (false);" [(set_attr "length" "1") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") - (set_attr "modrm" "0") - (set_attr "maybe_prefix_bnd" "1")]) + (set_attr "modrm" "0")]) (define_insn "interrupt_return" [(simple_return) @@ -13054,37 +12909,42 @@ [(simple_return) (unspec [(const_int 0)] UNSPEC_REP)] "reload_completed" -{ - if (ix86_bnd_prefixed_insn_p (insn)) - return "%!ret"; - - return "rep%; ret"; -} + "* return ix86_output_function_return (true);" [(set_attr "length" "2") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") (set_attr "modrm" "0")]) -(define_insn "simple_return_pop_internal" +(define_insn_and_split "simple_return_pop_internal" [(simple_return) (use (match_operand:SI 0 "const_int_operand"))] "reload_completed" "%!ret\t%0" + "&& cfun->machine->function_return_type != indirect_branch_keep" + [(const_int 0)] + "ix86_split_simple_return_pop_internal (operands[0]); DONE;" [(set_attr "length" "3") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "2") - (set_attr "modrm" "0") - (set_attr "maybe_prefix_bnd" "1")]) - -(define_insn "simple_return_indirect_internal" + (set_attr "modrm" "0")]) + +(define_expand "simple_return_indirect_internal" + [(parallel + [(simple_return) + (use (match_operand 0 "register_operand"))])]) + +(define_insn "*simple_return_indirect_internal<mode>" [(simple_return) - (use (match_operand:SI 0 "register_operand" "r"))] + (use (match_operand:W 0 "register_operand" "r"))] "reload_completed" - "%!jmp\t%A0" - [(set_attr "type" "ibr") - (set_attr "length_immediate" "0") - (set_attr "maybe_prefix_bnd" "1")]) + "* return ix86_output_indirect_function_return (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") + (const_string "multi") + (const_string "ibr"))) + (set_attr "length_immediate" "0")]) (define_insn "nop" [(const_int 0)] @@ -13187,7 +13047,6 @@ "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" [(set_attr "type" "lea") (set_attr "length_address" "4") - (set_attr "modrm_class" "unknown") (set_attr "mode" "DI")]) (define_insn "set_rip_rex64" @@ -13232,7 +13091,10 @@ stack address we wish to restore. */ tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD); - tmp = gen_rtx_MEM (Pmode, tmp); + /* Return address is always in word_mode. */ + tmp = gen_rtx_MEM (word_mode, tmp); + if (GET_MODE (ra) != word_mode) + ra = convert_to_mode (word_mode, ra, 1); emit_move_insn (tmp, ra); emit_jump_insn (gen_eh_return_internal ()); @@ -13724,6 +13586,43 @@ (set_attr "btver2_decode" "double") (set_attr "mode" "<MODE>")]) +(define_insn "*bmi_blsr_<mode>_cmp" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1)) + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_dup 1) + (const_int -1)) + (match_dup 1)))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsr_<mode>_ccz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1)) + (const_int 0))) + (clobber (match_scratch:SWI48 0 "=r"))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + ;; BMI2 instructions. (define_expand "bmi2_bzhi_<mode>3" [(parallel @@ -14067,8 +13966,8 @@ && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ bswap\t%0 - movbe\t{%1, %0|%0, %1} - movbe\t{%1, %0|%0, %1}" + movbe{<imodesuffix>}\t{%1, %0|%0, %1} + movbe{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip,imov,imov") (set_attr "modrm" "0,1,1") (set_attr "prefix_0f" "*,1,1") @@ -14084,26 +13983,58 @@ (set_attr "modrm" "0") (set_attr "mode" "<MODE>")]) -(define_insn "*bswaphi_lowpart_1" +(define_expand "bswaphi2" + [(set (match_operand:HI 0 "register_operand") + (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))] + "TARGET_MOVBE") + +(define_insn "*bswaphi2_movbe" + [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m") + (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))] + "TARGET_MOVBE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + movbe{w}\t{%1, %0|%0, %1} + movbe{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "*,1,1") + (set_attr "prefix_0f" "*,1,1") + (set_attr "prefix_extra" "*,1,1") + (set_attr "pent_pair" "np,*,*") + (set_attr "athlon_decode" "vector,*,*") + (set_attr "amdfam10_decode" "double,*,*") + (set_attr "bdver1_decode" "double,*,*") + (set_attr "mode" "QI,HI,HI")]) + +(define_peephole2 + [(set (match_operand:HI 0 "general_reg_operand") + (bswap:HI (match_dup 0)))] + "TARGET_MOVBE + && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "bswaphi_lowpart" [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) (bswap:HI (match_dup 0))) (clobber (reg:CC FLAGS_REG))] - "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)" + "" "@ xchg{b}\t{%h0, %b0|%b0, %h0} rol{w}\t{$8, %0|%0, 8}" - [(set_attr "length" "2,4") + [(set (attr "preferred_for_size") + (cond [(eq_attr "alternative" "0") + (symbol_ref "true")] + (symbol_ref "false"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "0") + (symbol_ref "TARGET_USE_XCHGB")] + (symbol_ref "!TARGET_USE_XCHGB"))) + (set_attr "length" "2,4") (set_attr "mode" "QI,HI")]) -(define_insn "bswaphi_lowpart" - [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) - (bswap:HI (match_dup 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "rol{w}\t{$8, %0|%0, 8}" - [(set_attr "length" "4") - (set_attr "mode" "HI")]) - (define_expand "paritydi2" [(set (match_operand:DI 0 "register_operand") (parity:DI (match_operand:DI 1 "register_operand")))] @@ -14273,7 +14204,18 @@ "TARGET_64BIT" { if (!TARGET_X32) - fputs (ASM_BYTE "0x66\n", asm_out_file); + /* The .loc directive has effect for 'the immediately following assembly + instruction'. So for a sequence: + .loc f l + .byte x + insn1 + the 'immediately following assembly instruction' is insn1. + We want to emit an insn prefix here, but if we use .byte (as shown in + 'ELF Handling For Thread-Local Storage'), a preceding .loc will point + inside the insn sequence, rather than to the start. After relaxation + of the sequence by the linker, the .loc might point inside an insn. + Use data16 prefix instead, which doesn't have this problem. */ + fputs ("\tdata16", asm_out_file); output_asm_insn ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) @@ -14681,6 +14623,20 @@ ;; Gcc is slightly more smart about handling normal two address instructions ;; so use special patterns for add and mull. +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + (define_insn "*fop_<mode>_comm" [(set (match_operand:MODEF 0 "register_operand" "=f,x,v") (match_operator:MODEF 3 "binary_fp_operator" @@ -14716,17 +14672,33 @@ (symbol_ref "false"))))]) (define_insn "*rcpsf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + [(set (match_operand:SF 0 "register_operand" "=x,x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] UNSPEC_RCP))] "TARGET_SSE && TARGET_SSE_MATH" - "%vrcpss\t{%1, %d0|%d0, %1}" + "@ + %vrcpss\t{%d1, %0|%0, %d1} + %vrcpss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "div_operator") + (const_string "fdiv") + (const_string "fop"))) + (set_attr "mode" "XF")]) + (define_insn "*fop_<mode>_1" [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v") (match_operator:MODEF 3 "binary_fp_operator" @@ -14763,49 +14735,65 @@ (symbol_ref "true") (symbol_ref "false"))))]) -;; ??? Add SSE splitters for these! -(define_insn "*fop_<MODEF:mode>_2_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f") - (match_operator:MODEF 3 "binary_fp_operator" - [(float:MODEF +(define_insn "*fop_<X87MODEF:mode>_2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (match_operator:X87MODEF 3 "binary_fp_operator" + [(float:X87MODEF (match_operand:SWI24 1 "nonimmediate_operand" "m")) - (match_operand:MODEF 2 "register_operand" "0")]))] - "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode) - && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + (match_operand:X87MODEF 2 "register_operand" "0")]))] + "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode) + && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH) && (TARGET_USE_<SWI24:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:MODEF 3 "mult_operator") - (const_string "fmul") - (match_operand:MODEF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) + (cond [(match_operand:X87MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:X87MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) (set_attr "fp_int_src" "true") (set_attr "mode" "<SWI24:MODE>")]) -(define_insn "*fop_<MODEF:mode>_3_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f") - (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "register_operand" "0") - (float:MODEF +(define_insn "*fop_<X87MODEF:mode>_3_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (match_operator:X87MODEF 3 "binary_fp_operator" + [(match_operand:X87MODEF 1 "register_operand" "0") + (float:X87MODEF (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))] - "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode) - && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode) + && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH) && (TARGET_USE_<SWI24:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:MODEF 3 "mult_operator") - (const_string "fmul") - (match_operand:MODEF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) + (cond [(match_operand:X87MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:X87MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) (set_attr "fp_int_src" "true") (set_attr "mode" "<MODE>")]) +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + (define_insn "*fop_df_4_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" @@ -14816,14 +14804,31 @@ && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator") - (const_string "fmul") - (match_operand:DF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) (set_attr "mode" "SF")]) +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + (define_insn "*fop_df_5_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" @@ -14834,14 +14839,32 @@ && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator") - (const_string "fmul") - (match_operand:DF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) (set_attr "mode" "SF")]) +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + (define_insn "*fop_df_6_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" @@ -14853,131 +14876,13 @@ && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator") - (const_string "fmul") - (match_operand:DF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) (set_attr "mode" "SF")]) - -(define_insn "*fop_xf_comm_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "%0") - (match_operand:XF 2 "register_operand" "f")]))] - "TARGET_80387 - && COMMUTATIVE_ARITH_P (operands[3])" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:XF 3 "mult_operator") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "XF")]) - -(define_insn "*fop_xf_1_i387" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (match_operand:XF 2 "register_operand" "f,0")]))] - "TARGET_80387 - && !COMMUTATIVE_ARITH_P (operands[3])" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:XF 3 "div_operator") - (const_string "fdiv") - (const_string "fop"))) - (set_attr "mode" "XF")]) - -(define_insn "*fop_xf_2_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (match_operator:XF 3 "binary_fp_operator" - [(float:XF - (match_operand:SWI24 1 "nonimmediate_operand" "m")) - (match_operand:XF 2 "register_operand" "0")]))] - "TARGET_80387 - && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator") - (const_string "fmul") - (match_operand:XF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fop_xf_3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0") - (float:XF - (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))] - "TARGET_80387 - && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator") - (const_string "fmul") - (match_operand:XF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fop_xf_4_i387" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF - (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) - (match_operand:XF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator") - (const_string "fmul") - (match_operand:XF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "<MODE>")]) - -(define_insn "*fop_xf_5_i387" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (float_extend:XF - (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator") - (const_string "fmul") - (match_operand:XF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "<MODE>")]) - -(define_insn "*fop_xf_6_i387" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF - (match_operand:MODEF 1 "register_operand" "0,f")) - (float_extend:XF - (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator") - (const_string "fmul") - (match_operand:XF 3 "div_operator") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "<MODE>")]) ;; FPU special functions. @@ -14985,7 +14890,7 @@ ;; all fancy i386 XFmode math functions. (define_insn "truncxf<mode>2_i387_noop_unspec" - [(set (match_operand:MODEF 0 "register_operand" "=f") + [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf") (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] UNSPEC_TRUNC_NOOP))] "TARGET_USE_FANCY_MATH_387" @@ -15004,25 +14909,14 @@ (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct")]) -(define_insn "sqrt_extend<mode>xf2_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF - (float_extend:XF - (match_operand:MODEF 1 "register_operand" "0"))))] - "TARGET_USE_FANCY_MATH_387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct") - (set_attr "amdfam10_decode" "direct") - (set_attr "bdver1_decode" "direct")]) - (define_insn "*rsqrtsf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + [(set (match_operand:SF 0 "register_operand" "=x,x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] UNSPEC_RSQRT))] "TARGET_SSE && TARGET_SSE_MATH" - "%vrsqrtss\t{%1, %d0|%d0, %1}" + "@ + %vrsqrtss\t{%d1, %0|%0, %d1} + %vrsqrtss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") @@ -15040,11 +14934,13 @@ }) (define_insn "*sqrt<mode>2_sse" - [(set (match_operand:MODEF 0 "register_operand" "=v") + [(set (match_operand:MODEF 0 "register_operand" "=v,v") (sqrt:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))] "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" - "%vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}" + "@ + %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1} + %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") (set_attr "btver2_sse_attr" "sqrt") @@ -15075,20 +14971,30 @@ if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) { rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = force_reg (<MODE>mode, operands[1]); - - emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1)); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_sqrtxf2 (op0, op1)); emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0)); DONE; } }) +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "SI") + (set_attr "unit" "i387")]) + (define_insn "fpremxf4_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FPREM_F)) - (set (match_operand:XF 1 "register_operand" "=u") + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM_U)) (set (reg:CCFP FPSR_REG) @@ -15163,7 +15069,7 @@ (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FPREM1_F)) - (set (match_operand:XF 1 "register_operand" "=u") + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM1_U)) (set (reg:CCFP FPSR_REG) @@ -15242,7 +15148,7 @@ [(UNSPEC_SIN "sin") (UNSPEC_COS "cos")]) -(define_insn "*<sincos>xf2_i387" +(define_insn "<sincos>xf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] SINCOS))] @@ -15253,31 +15159,29 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "*<sincos>_extend<mode>xf2_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 1 "register_operand" "0"))] - SINCOS))] +(define_expand "<sincos><mode>2" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF [(match_operand:MODEF 1 "general_operand")] + SINCOS))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" - "f<sincos>" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - -;; When sincos pattern is defined, sin and cos builtin functions will be -;; expanded to sincos pattern with one of its outputs left unused. -;; CSE pass will figure out if two sincos patterns can be combined, -;; otherwise sincos pattern will be split back to sin or cos pattern, -;; depending on the unused output. +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_<sincos>xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + DONE; +}) (define_insn "sincosxf3" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand" "=u") + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" @@ -15286,70 +15190,10 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_split - [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(match_operand:XF 2 "register_operand")] - UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand") - (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] - "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && can_create_pseudo_p ()" - [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]) - -(define_split - [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(match_operand:XF 2 "register_operand")] - UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand") - (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] - "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && can_create_pseudo_p ()" - [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]) - -(define_insn "sincos_extend<mode>xf3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 2 "register_operand" "0"))] - UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand" "=u") - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fsincos" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - -(define_split - [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 2 "register_operand"))] - UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand") - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] - "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && can_create_pseudo_p ()" - [(set (match_dup 1) - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]) - -(define_split - [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 2 "register_operand"))] - UNSPEC_SINCOS_COS)) - (set (match_operand:XF 1 "register_operand") - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] - "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && can_create_pseudo_p ()" - [(set (match_dup 0) - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]) - (define_expand "sincos<mode>3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "register_operand")) - (use (match_operand:MODEF 2 "register_operand"))] + (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -15357,39 +15201,23 @@ { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1)); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op2, operands[2])); + emit_insn (gen_sincosxf3 (op0, op1, op2)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[1], op1)); DONE; }) (define_insn "fptanxf4_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (match_operand:XF 3 "const_double_operand" "F")) - (set (match_operand:XF 1 "register_operand" "=u") + [(set (match_operand:SF 0 "register_operand" "=f") + (match_operand:SF 3 "const1_operand")) + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_TAN))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations - && standard_80387_constant_p (operands[3]) == 2" - "fptan" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - -(define_insn "fptan_extend<mode>xf4_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f") - (match_operand:MODEF 3 "const_double_operand" "F")) - (set (match_operand:XF 1 "register_operand" "=u") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 2 "register_operand" "0"))] - UNSPEC_TAN))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations - && standard_80387_constant_p (operands[3]) == 2" + && flag_unsafe_math_optimizations" "fptan" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") @@ -15401,36 +15229,33 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx one = gen_reg_rtx (XFmode); - rtx op2 = CONST1_RTX (XFmode); /* fld1 */ - - emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2)); + rtx one = gen_reg_rtx (SFmode); + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], + CONST1_RTX (SFmode))); DONE; }) (define_expand "tan<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - rtx one = gen_reg_rtx (<MODE>mode); - rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */ - - emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0, - operands[1], op2)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); - DONE; -}) - -(define_insn "*fpatanxf3_i387" + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_tanxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); + DONE; +}) + +(define_insn "atan2xf3" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") - (match_operand:XF 2 "register_operand" "u")] + (match_operand:XF 2 "register_operand" "f")] UNSPEC_FPATAN)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 @@ -15440,45 +15265,24 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "fpatan_extend<mode>xf3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 1 "register_operand" "0")) - (float_extend:XF - (match_operand:MODEF 2 "register_operand" "u"))] - UNSPEC_FPATAN)) - (clobber (match_scratch:XF 3 "=2"))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fpatan" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - -(define_expand "atan2xf3" - [(parallel [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(match_operand:XF 2 "register_operand") - (match_operand:XF 1 "register_operand")] - UNSPEC_FPATAN)) - (clobber (match_scratch:XF 3))])] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations") - (define_expand "atan2<mode>3" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand")) - (use (match_operand:MODEF 2 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand")) + (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op2, operands[2])); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + + emit_insn (gen_atan2xf3 (op0, op2, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15490,26 +15294,22 @@ (clobber (match_scratch:XF 3))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (XFmode); - emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ -}) + "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));") (define_expand "atan<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - rtx op2 = gen_reg_rtx (<MODE>mode); - emit_move_insn (op2, CONST1_RTX (<MODE>mode)); /* fld1 */ - - emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_atanxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15531,7 +15331,7 @@ for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); - emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ + emit_move_insn (operands[3], CONST1_RTX (XFmode)); }) (define_expand "asin<mode>2" @@ -15547,7 +15347,7 @@ emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_asinxf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15569,7 +15369,7 @@ for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); - emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ + emit_move_insn (operands[3], CONST1_RTX (XFmode)); }) (define_expand "acos<mode>2" @@ -15585,14 +15385,14 @@ emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_acosxf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) (define_insn "fyl2xxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") - (match_operand:XF 2 "register_operand" "u")] + (match_operand:XF 2 "register_operand" "f")] UNSPEC_FYL2X)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 @@ -15602,22 +15402,6 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "fyl2x_extend<mode>xf3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 1 "register_operand" "0")) - (match_operand:XF 2 "register_operand" "u")] - UNSPEC_FYL2X)) - (clobber (match_scratch:XF 3 "=2"))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fyl2x" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - (define_expand "logxf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand") @@ -15626,25 +15410,24 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - operands[2] = gen_reg_rtx (XFmode); - emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */ + operands[2] + = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ }) (define_expand "log<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - rtx op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */ - - emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_logxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15656,25 +15439,24 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - operands[2] = gen_reg_rtx (XFmode); - emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */ + operands[2] + = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */ }) (define_expand "log10<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - rtx op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */ - - emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_log10xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15685,33 +15467,29 @@ (clobber (match_scratch:XF 3))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (XFmode); - emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ -}) + "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));") (define_expand "log2<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); - - rtx op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ - - emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_log2xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) (define_insn "fyl2xp1xf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") - (match_operand:XF 2 "register_operand" "u")] + (match_operand:XF 2 "register_operand" "f")] UNSPEC_FYL2XP1)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 @@ -15721,22 +15499,6 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "fyl2xp1_extend<mode>xf3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 1 "register_operand" "0")) - (match_operand:XF 2 "register_operand" "u")] - UNSPEC_FYL2XP1)) - (clobber (match_scratch:XF 3 "=2"))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fyl2xp1" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - (define_expand "log1pxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] @@ -15749,20 +15511,18 @@ (define_expand "log1p<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0; - - op0 = gen_reg_rtx (XFmode); - - operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); - - ix86_emit_i387_log1p (op0, operands[1]); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_log1pxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15770,7 +15530,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_XTRACT_FRACT)) - (set (match_operand:XF 1 "register_operand" "=u") + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" @@ -15779,22 +15539,6 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "fxtract_extend<mode>xf3_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(float_extend:XF - (match_operand:MODEF 2 "register_operand" "0"))] - UNSPEC_XTRACT_FRACT)) - (set (match_operand:XF 1 "register_operand" "=u") - (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fxtract" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "XF")]) - (define_expand "logbxf2" [(parallel [(set (match_dup 2) (unspec:XF [(match_operand:XF 1 "register_operand")] @@ -15807,7 +15551,7 @@ (define_expand "logb<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -15816,8 +15560,9 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1)); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_logbxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op1)); DONE; }) @@ -15842,21 +15587,23 @@ (define_expand "ilogb<mode>2" [(use (match_operand:SI 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; + rtx op0, op1, op2; if (optimize_insn_for_size_p ()) FAIL; op0 = gen_reg_rtx (XFmode); op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); + op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op2, operands[1])); + emit_insn (gen_fxtractxf3_i387 (op0, op1, op2)); emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); DONE; }) @@ -15877,7 +15624,7 @@ (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FSCALE_FRACT)) - (set (match_operand:XF 1 "register_operand" "=u") + (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FSCALE_EXP))] "TARGET_USE_FANCY_MATH_387 @@ -15908,7 +15655,7 @@ for (i = 3; i < 10; i++) operands[i] = gen_reg_rtx (XFmode); - emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ + emit_move_insn (operands[7], CONST1_RTX (XFmode)); }) (define_expand "expxf2" @@ -15917,10 +15664,7 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx op2; - - op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ + rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; @@ -15934,14 +15678,12 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_expxf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15951,10 +15693,7 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx op2; - - op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ + rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; @@ -15968,14 +15707,12 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_exp10xf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -15985,10 +15722,7 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx op2; - - op2 = gen_reg_rtx (XFmode); - emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode)); emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; @@ -16002,14 +15736,12 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_exp2xf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -16018,7 +15750,6 @@ (match_dup 2))) (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) - (set (match_dup 9) (float_extend:XF (match_dup 13))) (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) (parallel [(set (match_dup 7) (unspec:XF [(match_dup 6) (match_dup 4)] @@ -16032,8 +15763,7 @@ (set (match_dup 11) (unspec:XF [(match_dup 9) (match_dup 8)] UNSPEC_FSCALE_EXP))]) - (set (match_dup 12) (minus:XF (match_dup 10) - (float_extend:XF (match_dup 13)))) + (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9))) (set (match_operand:XF 0 "register_operand") (plus:XF (match_dup 12) (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 @@ -16044,10 +15774,8 @@ for (i = 2; i < 13; i++) operands[i] = gen_reg_rtx (XFmode); - operands[13] - = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ - emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ + emit_move_insn (operands[9], CONST1_RTX (XFmode)); }) (define_expand "expm1<mode>2" @@ -16058,14 +15786,12 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_expm1xf2 (op0, op1)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -16076,10 +15802,8 @@ "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx tmp1, tmp2; - - tmp1 = gen_reg_rtx (XFmode); - tmp2 = gen_reg_rtx (XFmode); + rtx tmp1 = gen_reg_rtx (XFmode); + rtx tmp2 = gen_reg_rtx (XFmode); emit_insn (gen_floatsixf2 (tmp1, operands[2])); emit_insn (gen_fscalexf4_i387 (operands[0], tmp2, @@ -16096,14 +15820,12 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -16117,9 +15839,7 @@ UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - operands[3] = gen_reg_rtx (XFmode); -}) + "operands[3] = gen_reg_rtx (XFmode);") (define_expand "scalb<mode>3" [(use (match_operand:MODEF 0 "register_operand")) @@ -16130,16 +15850,14 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { - rtx op0, op1, op2; - - op0 = gen_reg_rtx (XFmode); - op1 = gen_reg_rtx (XFmode); - op2 = gen_reg_rtx (XFmode); + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_extend<mode>xf2 (op2, operands[2])); emit_insn (gen_scalbxf3 (op0, op1, op2)); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) @@ -16155,7 +15873,7 @@ (define_expand "significand<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] + (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16164,15 +15882,16 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_significandxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2 (operands[0], op0)); DONE; }) (define_insn "sse4_1_round<mode>2" [(set (match_operand:MODEF 0 "register_operand" "=x,v") - (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x,v") + (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm") (match_operand:SI 2 "const_0_to_15_operand" "n,n")] UNSPEC_ROUND))] "TARGET_SSE4_1" @@ -16196,22 +15915,10 @@ (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) -(define_insn "rint<mode>2_frndint" - [(set (match_operand:MODEF 0 "register_operand" "=f") - (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0")] - UNSPEC_FRNDINT))] - "TARGET_USE_FANCY_MATH_387" - "frndint" - [(set_attr "type" "fpspc") - (set_attr "znver1_decode" "vector") - (set_attr "mode" "<MODE>")]) - (define_expand "rint<mode>2" [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] - "(TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387)) + (use (match_operand:MODEF 1 "nonimmediate_operand"))] + "TARGET_USE_FANCY_MATH_387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) @@ -16223,7 +15930,46 @@ ix86_expand_rint (operands[0], operands[1]); } else - emit_insn (gen_rint<mode>2_frndint (operands[0], operands[1])); + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0)); + } + DONE; +}) + +(define_expand "nearbyintxf2" + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && !flag_trapping_math") + +(define_expand "nearbyint<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "nonimmediate_operand"))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && !flag_trapping_math) + || (TARGET_SSE4_1 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE4_1 && TARGET_SSE_MATH) + emit_insn (gen_sse4_1_round<mode>2 + (operands[0], operands[1], GEN_INT (ROUND_MXCSR + | ROUND_NO_EXC))); + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_nearbyintxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0)); + } DONE; }) @@ -16233,7 +15979,8 @@ "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) + && flag_unsafe_math_optimizations + && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math)" { @@ -16258,91 +16005,18 @@ DONE; }) -(define_insn_and_split "*fistdi2_1" - [(set (match_operand:DI 0 "nonimmediate_operand") - (unspec:DI [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST))] - "TARGET_USE_FANCY_MATH_387 - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fistdi2 (operands[0], operands[1])); - else - { - operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); - emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], - operands[2])); - } - DONE; -} - [(set_attr "type" "fpspc") - (set_attr "mode" "DI")]) - -(define_insn "fistdi2" - [(set (match_operand:DI 0 "memory_operand" "=m") +(define_insn "lrintxfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (unspec:DI [(match_operand:XF 1 "register_operand" "f")] UNSPEC_FIST)) - (clobber (match_scratch:XF 2 "=&1f"))] + (clobber (match_scratch:XF 2 "=&f"))] "TARGET_USE_FANCY_MATH_387" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fpspc") (set_attr "mode" "DI")]) -(define_insn "fistdi2_with_temp" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] - UNSPEC_FIST)) - (clobber (match_operand:DI 2 "memory_operand" "=X,m")) - (clobber (match_scratch:XF 3 "=&1f,&1f"))] - "TARGET_USE_FANCY_MATH_387" - "#" - [(set_attr "type" "fpspc") - (set_attr "mode" "DI")]) - -(define_split - [(set (match_operand:DI 0 "register_operand") - (unspec:DI [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST)) - (clobber (match_operand:DI 2 "memory_operand")) - (clobber (match_scratch 3))] - "reload_completed" - [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) - (clobber (match_dup 3))]) - (set (match_dup 0) (match_dup 2))]) - -(define_split - [(set (match_operand:DI 0 "memory_operand") - (unspec:DI [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST)) - (clobber (match_operand:DI 2 "memory_operand")) - (clobber (match_scratch 3))] - "reload_completed" - [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) - (clobber (match_dup 3))])]) - -(define_insn_and_split "*fist<mode>2_1" - [(set (match_operand:SWI24 0 "register_operand") - (unspec:SWI24 [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST))] - "TARGET_USE_FANCY_MATH_387 - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); - emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1], - operands[2])); - DONE; -} - [(set_attr "type" "fpspc") - (set_attr "mode" "<MODE>")]) - -(define_insn "fist<mode>2" - [(set (match_operand:SWI24 0 "memory_operand" "=m") +(define_insn "lrintxf<mode>2" + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387" @@ -16350,39 +16024,6 @@ [(set_attr "type" "fpspc") (set_attr "mode" "<MODE>")]) -(define_insn "fist<mode>2_with_temp" - [(set (match_operand:SWI24 0 "register_operand" "=r") - (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] - UNSPEC_FIST)) - (clobber (match_operand:SWI24 2 "memory_operand" "=m"))] - "TARGET_USE_FANCY_MATH_387" - "#" - [(set_attr "type" "fpspc") - (set_attr "mode" "<MODE>")]) - -(define_split - [(set (match_operand:SWI24 0 "register_operand") - (unspec:SWI24 [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST)) - (clobber (match_operand:SWI24 2 "memory_operand"))] - "reload_completed" - [(set (match_dup 2) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST)) - (set (match_dup 0) (match_dup 2))]) - -(define_split - [(set (match_operand:SWI24 0 "memory_operand") - (unspec:SWI24 [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST)) - (clobber (match_operand:SWI24 2 "memory_operand"))] - "reload_completed" - [(set (match_dup 0) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))]) - -(define_expand "lrintxf<mode>2" - [(set (match_operand:SWI248x 0 "nonimmediate_operand") - (unspec:SWI248x [(match_operand:XF 1 "register_operand")] - UNSPEC_FIST))] - "TARGET_USE_FANCY_MATH_387") - (define_expand "lrint<MODEF:mode><SWI48:mode>2" [(set (match_operand:SWI48 0 "nonimmediate_operand") (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] @@ -16446,9 +16087,9 @@ (UNSPEC_FIST_CEIL "CEIL")]) ;; Rounding mode control word calculation could clobber FLAGS_REG. -(define_insn_and_split "frndint<mode>2_<rounding>" - [(set (match_operand:X87MODEF 0 "register_operand") - (unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand")] +(define_insn_and_split "frndintxf2_<rounding>" + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] FRNDINT_ROUNDING)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -16463,18 +16104,18 @@ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>); - emit_insn (gen_frndint<mode>2_<rounding>_i387 (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1], + operands[2], operands[3])); DONE; } [(set_attr "type" "frndint") (set_attr "i387_cw" "<rounding>") - (set_attr "mode" "<MODE>")]) - -(define_insn "frndint<mode>2_<rounding>_i387" - [(set (match_operand:X87MODEF 0 "register_operand" "=f") - (unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand" "0")] - FRNDINT_ROUNDING)) + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_<rounding>_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + FRNDINT_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "TARGET_USE_FANCY_MATH_387 @@ -16482,7 +16123,7 @@ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" [(set_attr "type" "frndint") (set_attr "i387_cw" "<rounding>") - (set_attr "mode" "<MODE>")]) + (set_attr "mode" "XF")]) (define_expand "<rounding_insn>xf2" [(parallel [(set (match_operand:XF 0 "register_operand") @@ -16502,11 +16143,11 @@ || TARGET_MIX_SSE_I387) && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && (TARGET_SSE4_1 || !flag_trapping_math - || flag_fp_int_builtin_inexact))" + && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact + || !flag_trapping_math))" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && (TARGET_SSE4_1 || !flag_trapping_math || flag_fp_int_builtin_inexact)) + && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math)) { if (TARGET_SSE4_1) emit_insn (gen_sse4_1_round<mode>2 @@ -16536,72 +16177,14 @@ } } else - emit_insn (gen_frndint<mode>2_<rounding> (operands[0], operands[1])); - DONE; -}) - -;; Rounding mode control word calculation could clobber FLAGS_REG. -(define_insn_and_split "frndintxf2_mask_pm" - [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(match_operand:XF 1 "register_operand")] - UNSPEC_FRNDINT_MASK_PM)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - ix86_optimize_mode_switching[I387_MASK_PM] = 1; - - operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); - operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); - - emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -} - [(set_attr "type" "frndint") - (set_attr "i387_cw" "mask_pm") - (set_attr "mode" "XF")]) - -(define_insn "frndintxf2_mask_pm_i387" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT_MASK_PM)) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" - [(set_attr "type" "frndint") - (set_attr "i387_cw" "mask_pm") - (set_attr "mode" "XF")]) - -(define_expand "nearbyintxf2" - [(parallel [(set (match_operand:XF 0 "register_operand") - (unspec:XF [(match_operand:XF 1 "register_operand")] - UNSPEC_FRNDINT_MASK_PM)) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations") - -(define_expand "nearbyint<mode>2" - [(use (match_operand:MODEF 0 "register_operand")) - (use (match_operand:MODEF 1 "register_operand"))] - "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" -{ - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extend<mode>xf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_mask_pm (op0, op1)); - - emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_<rounding> (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0)); + } DONE; }) @@ -16622,16 +16205,9 @@ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); - emit_insn (gen_fist<mode>2_<rounding>_with_temp - (operands[0], operands[1], operands[2], - operands[3], operands[4])); - } + + emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1], + operands[2], operands[3])); DONE; } [(set_attr "type" "fistp") @@ -16639,12 +16215,12 @@ (set_attr "mode" "<MODE>")]) (define_insn "fistdi2_<rounding>" - [(set (match_operand:DI 0 "memory_operand" "=m") + [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (unspec:DI [(match_operand:XF 1 "register_operand" "f")] FIST_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) - (clobber (match_scratch:XF 4 "=&1f"))] + (clobber (match_scratch:XF 4 "=&f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "* return output_fix_trunc (insn, operands, false);" @@ -16652,54 +16228,8 @@ (set_attr "i387_cw" "<rounding>") (set_attr "mode" "DI")]) -(define_insn "fistdi2_<rounding>_with_temp" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=X,m")) - (clobber (match_scratch:XF 5 "=&1f,&1f"))] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "<rounding>") - (set_attr "mode" "DI")]) - -(define_split - [(set (match_operand:DI 0 "register_operand") - (unspec:DI [(match_operand:XF 1 "register_operand")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:DI 4 "memory_operand")) - (clobber (match_scratch 5))] - "reload_completed" - [(parallel [(set (match_dup 4) - (unspec:DI [(match_dup 1)] FIST_ROUNDING)) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))]) - -(define_split - [(set (match_operand:DI 0 "memory_operand") - (unspec:DI [(match_operand:XF 1 "register_operand")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:DI 4 "memory_operand")) - (clobber (match_scratch 5))] - "reload_completed" - [(parallel [(set (match_dup 0) - (unspec:DI [(match_dup 1)] FIST_ROUNDING)) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))])]) - (define_insn "fist<mode>2_<rounding>" - [(set (match_operand:SWI24 0 "memory_operand" "=m") + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] FIST_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) @@ -16711,47 +16241,6 @@ (set_attr "i387_cw" "<rounding>") (set_attr "mode" "<MODE>")]) -(define_insn "fist<mode>2_<rounding>_with_temp" - [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r") - (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f,f")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "<rounding>") - (set_attr "mode" "<MODE>")]) - -(define_split - [(set (match_operand:SWI24 0 "register_operand") - (unspec:SWI24 [(match_operand:XF 1 "register_operand")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:SWI24 4 "memory_operand"))] - "reload_completed" - [(parallel [(set (match_dup 4) - (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING)) - (use (match_dup 2)) - (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))]) - -(define_split - [(set (match_operand:SWI24 0 "memory_operand") - (unspec:SWI24 [(match_operand:XF 1 "register_operand")] - FIST_ROUNDING)) - (use (match_operand:HI 2 "memory_operand")) - (use (match_operand:HI 3 "memory_operand")) - (clobber (match_operand:SWI24 4 "memory_operand"))] - "reload_completed" - [(parallel [(set (match_dup 0) - (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING)) - (use (match_dup 2)) - (use (match_dup 3))])]) - (define_expand "l<rounding_insn>xf<mode>2" [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand") (unspec:SWI248x [(match_operand:XF 1 "register_operand")] @@ -16767,12 +16256,19 @@ FIST_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math" -{ - if (TARGET_64BIT && optimize_insn_for_size_p ()) - FAIL; - - if (ROUND_<ROUNDING> == ROUND_FLOOR) + && (TARGET_SSE4_1 || !flag_trapping_math)" +{ + if (TARGET_SSE4_1) + { + rtx tmp = gen_reg_rtx (<MODEF:MODE>mode); + + emit_insn (gen_sse4_1_round<mode>2 + (tmp, operands[1], GEN_INT (ROUND_<ROUNDING> + | ROUND_NO_EXC))); + emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2 + (operands[0], tmp)); + } + else if (ROUND_<ROUNDING> == ROUND_FLOOR) ix86_expand_lfloorceil (operands[0], operands[1], true); else if (ROUND_<ROUNDING> == ROUND_CEIL) ix86_expand_lfloorceil (operands[0], operands[1], false); @@ -16794,81 +16290,6 @@ (set_attr "unit" "i387") (set_attr "mode" "<MODE>")]) -(define_insn_and_split "fxam<mode>2_i387_with_temp" - [(set (match_operand:HI 0 "register_operand") - (unspec:HI - [(match_operand:MODEF 1 "memory_operand")] - UNSPEC_FXAM_MEM))] - "TARGET_USE_FANCY_MATH_387 - && can_create_pseudo_p ()" - "#" - "&& 1" - [(set (match_dup 2)(match_dup 1)) - (set (match_dup 0) - (unspec:HI [(match_dup 2)] UNSPEC_FXAM))] -{ - operands[2] = gen_reg_rtx (<MODE>mode); - - MEM_VOLATILE_P (operands[1]) = 1; -} - [(set_attr "type" "multi") - (set_attr "unit" "i387") - (set_attr "mode" "<MODE>")]) - -(define_expand "isinfxf2" - [(use (match_operand:SI 0 "register_operand")) - (use (match_operand:XF 1 "register_operand"))] - "TARGET_USE_FANCY_MATH_387 - && ix86_libc_has_function (function_c99_misc)" -{ - rtx mask = GEN_INT (0x45); - rtx val = GEN_INT (0x05); - - rtx scratch = gen_reg_rtx (HImode); - rtx res = gen_reg_rtx (QImode); - - emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); - - emit_insn (gen_andqi_ext_1 (scratch, scratch, mask)); - emit_insn (gen_cmpqi_ext_3 (scratch, val)); - ix86_expand_setcc (res, EQ, - gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); - emit_insn (gen_zero_extendqisi2 (operands[0], res)); - DONE; -}) - -(define_expand "isinf<mode>2" - [(use (match_operand:SI 0 "register_operand")) - (use (match_operand:MODEF 1 "nonimmediate_operand"))] - "TARGET_USE_FANCY_MATH_387 - && ix86_libc_has_function (function_c99_misc) - && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" -{ - rtx mask = GEN_INT (0x45); - rtx val = GEN_INT (0x05); - - rtx scratch = gen_reg_rtx (HImode); - rtx res = gen_reg_rtx (QImode); - - /* Remove excess precision by forcing value through memory. */ - if (memory_operand (operands[1], VOIDmode)) - emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1])); - else - { - rtx temp = assign_386_stack_local (<MODE>mode, SLOT_TEMP); - - emit_move_insn (temp, operands[1]); - emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp)); - } - - emit_insn (gen_andqi_ext_1 (scratch, scratch, mask)); - emit_insn (gen_cmpqi_ext_3 (scratch, val)); - ix86_expand_setcc (res, EQ, - gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); - emit_insn (gen_zero_extendqisi2 (operands[0], res)); - DONE; -}) - (define_expand "signbittf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:TF 1 "register_operand"))] @@ -17721,14 +17142,9 @@ (clobber (reg:CC FLAGS_REG))] "" "sbb{<imodesuffix>}\t%0, %0" - ; Since we don't have the proper number of operands for an alu insn, - ; fill in all the blanks. - [(set_attr "type" "alu") - (set_attr "modrm_class" "op0") + [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") - (set_attr "memory" "none") - (set_attr "imm_disp" "false") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "0")]) @@ -17741,12 +17157,9 @@ (clobber (reg:CC FLAGS_REG))] "" "sbb{<imodesuffix>}\t%0, %0" - [(set_attr "type" "alu") - (set_attr "modrm_class" "op0") + [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") - (set_attr "memory" "none") - (set_attr "imm_disp" "false") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "0")]) @@ -17757,12 +17170,9 @@ (clobber (reg:CC FLAGS_REG))] "" "sbb{<imodesuffix>}\t%0, %0" - [(set_attr "type" "alu") - (set_attr "modrm_class" "op0") + [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") - (set_attr "memory" "none") - (set_attr "imm_disp" "false") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "0")]) @@ -18308,28 +17718,6 @@ "* return output_probe_stack_range (operands[0], operands[2]);" [(set_attr "type" "multi")]) -/* Additional processing for builtin_setjmp. Store the shadow stack pointer - as a forth element in jmpbuf. */ -(define_expand "builtin_setjmp_setup" - [(match_operand 0 "address_operand")] - "TARGET_SHSTK" -{ - if (flag_cf_protection & CF_RETURN) - { - rtx mem, reg_ssp; - - mem = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], - 3 * GET_MODE_SIZE (Pmode))); - reg_ssp = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (reg_ssp, const0_rtx)); - emit_insn ((Pmode == SImode) - ? gen_rdsspsi (reg_ssp, reg_ssp) - : gen_rdsspdi (reg_ssp, reg_ssp)); - emit_move_insn (mem, reg_ssp); - } - DONE; -}) - (define_expand "builtin_setjmp_receiver" [(label_ref (match_operand 0))] "!TARGET_64BIT && flag_pic" @@ -18350,80 +17738,141 @@ DONE; }) -(define_expand "builtin_longjmp" - [(match_operand 0 "address_operand")] - "TARGET_SHSTK" -{ - rtx fp, lab, stack; - rtx jump, label, reg_adj, reg_ssp, reg_minus, mem_buf, tmp, clob; - machine_mode sa_mode = STACK_SAVEAREA_MODE (SAVE_NONLOCAL); - - /* Adjust the shadow stack pointer (ssp) to the value saved in the - jmp_buf. The saving was done in the builtin_setjmp_setup. */ - if (flag_cf_protection & CF_RETURN) - { - /* Get current shadow stack pointer. The code below will check if - SHSTK feature is enabled. If it's not enabled RDSSP instruction +(define_expand "save_stack_nonlocal" + [(set (match_operand 0 "memory_operand") + (match_operand 1 "register_operand"))] + "" +{ + rtx stack_slot; + if ((flag_cf_protection & CF_RETURN)) + { + /* Copy shadow stack pointer to the first slot and stack ppointer + to the second slot. */ + rtx ssp_slot = adjust_address (operands[0], word_mode, 0); + stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD); + rtx ssp = gen_reg_rtx (word_mode); + emit_insn ((word_mode == SImode) + ? gen_rdsspsi (ssp) + : gen_rdsspdi (ssp)); + emit_move_insn (ssp_slot, ssp); + } + else + stack_slot = adjust_address (operands[0], Pmode, 0); + emit_move_insn (stack_slot, operands[1]); + DONE; +}) + +(define_expand "restore_stack_nonlocal" + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "memory_operand" ""))] + "" +{ + rtx stack_slot; + if ((flag_cf_protection & CF_RETURN)) + { + /* Restore shadow stack pointer from the first slot and stack + pointer from the second slot. */ + rtx ssp_slot = adjust_address (operands[1], word_mode, 0); + stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD); + + rtx flags, jump, noadj_label, inc_label, loop_label; + rtx reg_adj, reg_ssp, tmp, clob; + + /* Get the current shadow stack pointer. The code below will check if + SHSTK feature is enabled. If it is not enabled the RDSSP instruction is a NOP. */ - reg_ssp = gen_reg_rtx (Pmode); + reg_ssp = gen_reg_rtx (word_mode); emit_insn (gen_rtx_SET (reg_ssp, const0_rtx)); - emit_insn ((Pmode == SImode) - ? gen_rdsspsi (reg_ssp, reg_ssp) - : gen_rdsspdi (reg_ssp, reg_ssp)); - mem_buf = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], - 3 * GET_MODE_SIZE (Pmode))), + emit_insn ((word_mode == SImode) + ? gen_rdsspsi (reg_ssp) + : gen_rdsspdi (reg_ssp)); /* Compare through substraction the saved and the current ssp to decide if ssp has to be adjusted. */ - reg_minus = gen_reg_rtx (Pmode); - tmp = gen_rtx_SET (reg_minus, gen_rtx_MINUS (Pmode, reg_ssp, mem_buf)); + tmp = gen_rtx_SET (reg_ssp, gen_rtx_MINUS (word_mode, reg_ssp, + ssp_slot)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); emit_insn (tmp); - /* Jump over adjustment code. */ - label = gen_label_rtx (); - tmp = gen_rtx_REG (CCmode, FLAGS_REG); - tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + /* Compare and jump over adjustment code. */ + noadj_label = gen_label_rtx (); + flags = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx); tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), + gen_rtx_LABEL_REF (VOIDmode, noadj_label), pc_rtx); jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - JUMP_LABEL (jump) = label; - - /* Adjust the ssp. */ - reg_adj = gen_reg_rtx (Pmode); + JUMP_LABEL (jump) = noadj_label; + + /* Compute the numebr of frames to adjust. */ + reg_adj = gen_lowpart (ptr_mode, reg_ssp); tmp = gen_rtx_SET (reg_adj, - gen_rtx_LSHIFTRT (Pmode, negate_rtx (Pmode, reg_minus), - GEN_INT (3))); + gen_rtx_LSHIFTRT (ptr_mode, + negate_rtx (ptr_mode, reg_adj), + GEN_INT ((word_mode == SImode) + ? 2 + : 3))); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); emit_insn (tmp); - emit_insn ((Pmode == SImode) - ? gen_incsspsi (reg_adj) - : gen_incsspdi (reg_adj)); - - emit_label (label); - LABEL_NUSES (label) = 1; - } - - /* This code is the same as in expand_buildin_longjmp. */ - fp = gen_rtx_MEM (Pmode, operands[0]); - lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], - GET_MODE_SIZE (Pmode))); - stack = gen_rtx_MEM (sa_mode, plus_constant (Pmode, operands[0], - 2 * GET_MODE_SIZE (Pmode))); - lab = copy_to_reg (lab); - - emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); - emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); - - emit_move_insn (hard_frame_pointer_rtx, fp); - emit_stack_restore (SAVE_NONLOCAL, stack); - - emit_use (hard_frame_pointer_rtx); - emit_use (stack_pointer_rtx); - emit_indirect_jump (lab); + + /* Check if number of frames <= 255 so no loop is needed. */ + tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255)); + flags = gen_rtx_REG (CCmode, FLAGS_REG); + emit_insn (gen_rtx_SET (flags, tmp)); + + inc_label = gen_label_rtx (); + tmp = gen_rtx_LEU (VOIDmode, flags, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, inc_label), + pc_rtx); + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + JUMP_LABEL (jump) = inc_label; + + rtx reg_255 = gen_reg_rtx (word_mode); + emit_move_insn (reg_255, GEN_INT (255)); + + /* Adjust the ssp in a loop. */ + loop_label = gen_label_rtx (); + emit_label (loop_label); + LABEL_NUSES (loop_label) = 1; + + emit_insn ((word_mode == SImode) + ? gen_incsspsi (reg_255) + : gen_incsspdi (reg_255)); + tmp = gen_rtx_SET (reg_adj, gen_rtx_MINUS (ptr_mode, + reg_adj, + GEN_INT (255))); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); + emit_insn (tmp); + + tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255)); + flags = gen_rtx_REG (CCmode, FLAGS_REG); + emit_insn (gen_rtx_SET (flags, tmp)); + + /* Jump to the loop label. */ + tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, loop_label), + pc_rtx); + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + JUMP_LABEL (jump) = loop_label; + + emit_label (inc_label); + LABEL_NUSES (inc_label) = 1; + emit_insn ((word_mode == SImode) + ? gen_incsspsi (reg_ssp) + : gen_incsspdi (reg_ssp)); + + emit_label (noadj_label); + LABEL_NUSES (noadj_label) = 1; + } + else + stack_slot = adjust_address (operands[1], Pmode, 0); + emit_move_insn (operands[0], stack_slot); + DONE; }) @@ -18855,6 +18304,37 @@ const0_rtx); }) +;; Likewise for cmpelim optimized pattern. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "<nonmemory_operand>")]) + (const_int 0))) + (set (match_dup 0) (match_dup 3))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && ix86_match_ccmode (peep2_next_insn (1), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 1) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); + operands[5] + = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + copy_rtx (operands[1]), operands[2]); + operands[6] + = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -18898,7 +18378,7 @@ (set (match_dup 1) (match_dup 0)) (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) - && GET_CODE (operands[2]) != MINUS + && COMMUTATIVE_ARITH_P (operands[2]) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && ix86_match_ccmode (peep2_next_insn (2), @@ -18918,6 +18398,34 @@ const0_rtx); }) +;; Likewise for cmpelim optimized pattern. +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")]) + (const_int 0))) + (set (match_dup 0) (match_dup 2))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && COMMUTATIVE_ARITH_P (operands[2]) + && peep2_reg_dead_p (2, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && ix86_match_ccmode (peep2_next_insn (0), + GET_CODE (operands[2]) == PLUS + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 3) (match_dup 5)) + (set (match_dup 1) (match_dup 4))])] +{ + operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[4] + = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[1]), operands[0]); + operands[5] + = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -19836,7 +19344,6 @@ (define_insn "fnstenv" [(set (match_operand:BLK 0 "memory_operand" "=m") (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV)) - (clobber (reg:HI FPCR_REG)) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) (clobber (reg:XF ST2_REG)) @@ -19855,8 +19362,6 @@ (define_insn "fldenv" [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] UNSPECV_FLDENV) - (clobber (reg:CCFP FPSR_REG)) - (clobber (reg:HI FPCR_REG)) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) (clobber (reg:XF ST2_REG)) @@ -20050,18 +19555,16 @@ ;; CET instructions (define_insn "rdssp<mode>" [(set (match_operand:SWI48x 0 "register_operand" "=r") - (unspec_volatile:SWI48x - [(match_operand:SWI48x 1 "register_operand" "0")] - UNSPECV_NOP_RDSSP))] - "TARGET_SHSTK" - "rdssp<mskmodesuffix>\t%0" - [(set_attr "length" "4") + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_NOP_RDSSP))] + "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)" + "xor{l}\t%k0, %k0\n\trdssp<mskmodesuffix>\t%0" + [(set_attr "length" "6") (set_attr "type" "other")]) (define_insn "incssp<mode>" [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")] - UNSPECV_INCSSP)] - "TARGET_SHSTK" + UNSPECV_INCSSP)] + "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)" "incssp<mskmodesuffix>\t%0" [(set_attr "length" "4") (set_attr "type" "other")]) @@ -20073,9 +19576,14 @@ [(set_attr "length" "5") (set_attr "type" "other")]) -(define_insn "rstorssp" - [(unspec_volatile [(match_operand 0 "memory_operand" "m")] - UNSPECV_RSTORSSP)] +(define_expand "rstorssp" + [(unspec_volatile [(match_operand 0 "memory_operand")] + UNSPECV_RSTORSSP)] + "TARGET_SHSTK") + +(define_insn "*rstorssp<mode>" + [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")] + UNSPECV_RSTORSSP)] "TARGET_SHSTK" "rstorssp\t%0" [(set_attr "length" "5") @@ -20084,7 +19592,7 @@ (define_insn "wrss<mode>" [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r") (match_operand:SWI48x 1 "memory_operand" "m")] - UNSPECV_WRSS)] + UNSPECV_WRSS)] "TARGET_SHSTK" "wrss<mskmodesuffix>\t%0, %1" [(set_attr "length" "3") @@ -20093,7 +19601,7 @@ (define_insn "wruss<mode>" [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r") (match_operand:SWI48x 1 "memory_operand" "m")] - UNSPECV_WRUSS)] + UNSPECV_WRUSS)] "TARGET_SHSTK" "wruss<mskmodesuffix>\t%0, %1" [(set_attr "length" "4") @@ -20106,9 +19614,14 @@ [(set_attr "length" "4") (set_attr "type" "other")]) -(define_insn "clrssbsy" - [(unspec_volatile [(match_operand 0 "memory_operand" "m")] - UNSPECV_CLRSSBSY)] +(define_expand "clrssbsy" + [(unspec_volatile [(match_operand 0 "memory_operand")] + UNSPECV_CLRSSBSY)] + "TARGET_SHSTK") + +(define_insn "*clrssbsy<mode>" + [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")] + UNSPECV_CLRSSBSY)] "TARGET_SHSTK" "clrssbsy\t%0" [(set_attr "length" "4") @@ -20116,9 +19629,10 @@ (define_insn "nop_endbr" [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)] - "TARGET_IBT" - "* -{ return (TARGET_64BIT)? \"endbr64\" : \"endbr32\"; }" + "(flag_cf_protection & CF_BRANCH)" +{ + return TARGET_64BIT ? "endbr64" : "endbr32"; +} [(set_attr "length" "4") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) @@ -20247,161 +19761,6 @@ [(set_attr "length" "3") (set_attr "memory" "unknown")]) -;; MPX instructions - -(define_expand "<mode>_mk" - [(set (match_operand:BND 0 "register_operand") - (unspec:BND - [(mem:<bnd_ptr> - (match_par_dup 3 - [(match_operand:<bnd_ptr> 1 "register_operand") - (match_operand:<bnd_ptr> 2 "address_mpx_no_base_operand")]))] - UNSPEC_BNDMK))] - "TARGET_MPX" -{ - operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], - operands[2]), - UNSPEC_BNDMK_ADDR); -}) - -(define_insn "*<mode>_mk" - [(set (match_operand:BND 0 "register_operand" "=w") - (unspec:BND - [(match_operator:<bnd_ptr> 3 "bnd_mem_operator" - [(unspec:<bnd_ptr> - [(match_operand:<bnd_ptr> 1 "register_operand" "r") - (match_operand:<bnd_ptr> 2 "address_mpx_no_base_operand" "Tb")] - UNSPEC_BNDMK_ADDR)])] - UNSPEC_BNDMK))] - "TARGET_MPX" - "bndmk\t{%3, %0|%0, %3}" - [(set_attr "type" "mpxmk")]) - -(define_expand "mov<mode>" - [(set (match_operand:BND 0 "general_operand") - (match_operand:BND 1 "general_operand"))] - "TARGET_MPX" - "ix86_expand_move (<MODE>mode, operands); DONE;") - -(define_insn "*mov<mode>_internal_mpx" - [(set (match_operand:BND 0 "nonimmediate_operand" "=w,m") - (match_operand:BND 1 "general_operand" "wm,w"))] - "TARGET_MPX" - "bndmov\t{%1, %0|%0, %1}" - [(set_attr "type" "mpxmov")]) - -(define_expand "<mode>_<bndcheck>" - [(parallel - [(unspec - [(match_operand:BND 0 "register_operand") - (match_operand:<bnd_ptr> 1 "address_no_seg_operand")] BNDCHECK) - (set (match_dup 2) - (unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))])] - "TARGET_MPX" -{ - operands[2] = gen_rtx_MEM (BLKmode, operands[1]); - MEM_VOLATILE_P (operands[2]) = 1; -}) - -(define_insn "*<mode>_<bndcheck>" - [(unspec - [(match_operand:BND 0 "register_operand" "w") - (match_operand:<bnd_ptr> 1 "address_no_seg_operand" "Ts")] BNDCHECK) - (set (match_operand:BLK 2 "bnd_mem_operator") - (unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))] - "TARGET_MPX" - "bnd<bndcheck>\t{%a1, %0|%0, %a1}" - [(set_attr "type" "mpxchk")]) - -(define_expand "<mode>_ldx" - [(parallel - [(set (match_operand:BND 0 "register_operand") - (unspec:BND - [(mem:<bnd_ptr> - (match_par_dup 3 - [(match_operand:<bnd_ptr> 1 "address_mpx_no_index_operand") - (match_operand:<bnd_ptr> 2 "register_operand")]))] - UNSPEC_BNDLDX)) - (use (mem:BLK (match_dup 1)))])] - "TARGET_MPX" -{ - /* Avoid registers which cannot be used as index. */ - if (!index_register_operand (operands[2], Pmode)) - operands[2] = copy_addr_to_reg (operands[2]); - - operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], - operands[2]), - UNSPEC_BNDLDX_ADDR); -}) - -(define_insn "*<mode>_ldx" - [(set (match_operand:BND 0 "register_operand" "=w") - (unspec:BND - [(match_operator:<bnd_ptr> 3 "bnd_mem_operator" - [(unspec:<bnd_ptr> - [(match_operand:<bnd_ptr> 1 "address_mpx_no_index_operand" "Ti") - (match_operand:<bnd_ptr> 2 "register_operand" "l")] - UNSPEC_BNDLDX_ADDR)])] - UNSPEC_BNDLDX)) - (use (mem:BLK (match_dup 1)))] - "TARGET_MPX" - "bndldx\t{%3, %0|%0, %3}" - [(set_attr "type" "mpxld")]) - -(define_expand "<mode>_stx" - [(parallel - [(unspec - [(mem:<bnd_ptr> - (match_par_dup 3 - [(match_operand:<bnd_ptr> 0 "address_mpx_no_index_operand") - (match_operand:<bnd_ptr> 1 "register_operand")])) - (match_operand:BND 2 "register_operand")] - UNSPEC_BNDSTX) - (set (match_dup 4) - (unspec:BLK [(match_dup 4)] UNSPEC_MPX_FENCE))])] - "TARGET_MPX" -{ - /* Avoid registers which cannot be used as index. */ - if (!index_register_operand (operands[1], Pmode)) - operands[1] = copy_addr_to_reg (operands[1]); - - operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[0], - operands[1]), - UNSPEC_BNDLDX_ADDR); - operands[4] = gen_rtx_MEM (BLKmode, operands[0]); - MEM_VOLATILE_P (operands[4]) = 1; -}) - -(define_insn "*<mode>_stx" - [(unspec - [(match_operator:<bnd_ptr> 3 "bnd_mem_operator" - [(unspec:<bnd_ptr> - [(match_operand:<bnd_ptr> 0 "address_mpx_no_index_operand" "Ti") - (match_operand:<bnd_ptr> 1 "register_operand" "l")] - UNSPEC_BNDLDX_ADDR)]) - (match_operand:BND 2 "register_operand" "w")] - UNSPEC_BNDSTX) - (set (match_operand:BLK 4 "bnd_mem_operator") - (unspec:BLK [(match_dup 4)] UNSPEC_MPX_FENCE))] - "TARGET_MPX" - "bndstx\t{%2, %3|%3, %2}" - [(set_attr "type" "mpxst")]) - -(define_insn "move_size_reloc_<mode>" - [(set (match_operand:SWI48 0 "register_operand" "=r") - (unspec:SWI48 - [(match_operand:SWI48 1 "symbol_operand")] - UNSPEC_SIZEOF))] - "TARGET_MPX" -{ - if (x86_64_immediate_size_operand (operands[1], VOIDmode)) - return "mov{l}\t{%1@SIZE, %k0|%k0, %1@SIZE}"; - else - return "movabs{q}\t{%1@SIZE, %0|%0, %1@SIZE}"; -} - [(set_attr "type" "imov") - (set_attr "mode" "<MODE>")]) - ;; RDPKRU and WRPKRU (define_expand "rdpkru" @@ -20447,10 +19806,112 @@ (define_insn "rdpid" [(set (match_operand:SI 0 "register_operand" "=r") (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))] - "TARGET_RDPID" + "!TARGET_64BIT && TARGET_RDPID" + "rdpid\t%0" + [(set_attr "type" "other")]) + +(define_insn "rdpid_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))] + "TARGET_64BIT && TARGET_RDPID" "rdpid\t%0" [(set_attr "type" "other")]) +;; Intirinsics for > i486 + +(define_insn "wbinvd" + [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)] + "" + "wbinvd" + [(set_attr "type" "other")]) + +(define_insn "wbnoinvd" + [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)] + "TARGET_WBNOINVD" + "wbnoinvd" + [(set_attr "type" "other")]) + +;; MOVDIRI and MOVDIR64B + +(define_insn "movdiri<mode>" + [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m") + (match_operand:SWI48 1 "register_operand" "r")] + UNSPECV_MOVDIRI)] + "TARGET_MOVDIRI" + "movdiri\t{%1, %0|%0, %1}" + [(set_attr "type" "other")]) + +(define_insn "movdir64b_<mode>" + [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r") + (match_operand:XI 1 "memory_operand")] + UNSPECV_MOVDIR64B)] + "TARGET_MOVDIR64B" + "movdir64b\t{%1, %0|%0, %1}" + [(set_attr "type" "other")]) + +;; WAITPKG + +(define_insn "umwait" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "A")] + UNSPECV_UMWAIT))] + "!TARGET_64BIT && TARGET_WAITPKG" + "umwait\t%0" + [(set_attr "length" "3")]) + +(define_insn "umwait_rex64" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_UMWAIT))] + "TARGET_64BIT && TARGET_WAITPKG" + "umwait\t%0" + [(set_attr "length" "3")]) + +(define_insn "umonitor_<mode>" + [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] + UNSPECV_UMONITOR)] + "TARGET_WAITPKG" + "umonitor\t%0" + [(set (attr "length") + (symbol_ref ("(Pmode != word_mode) + 3")))]) + +(define_insn "tpause" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "A")] + UNSPECV_TPAUSE))] + "!TARGET_64BIT && TARGET_WAITPKG" + "tpause\t%0" + [(set_attr "length" "3")]) + +(define_insn "tpause_rex64" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_TPAUSE))] + "TARGET_64BIT && TARGET_WAITPKG" + "tpause\t%0" + [(set_attr "length" "3")]) + +(define_insn "cldemote" + [(unspec_volatile[(match_operand 0 "address_operand" "p")] + UNSPECV_CLDEMOTE)] + "TARGET_CLDEMOTE" + "cldemote\t%a0" + [(set_attr "type" "other") + (set_attr "memory" "unknown")]) + +(define_insn "speculation_barrier" + [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] + "" + "lfence" + [(set_attr "type" "other") + (set_attr "length" "3")]) + (include "mmx.md") (include "sse.md") (include "sync.md")