diff gcc/config/i386/i386.md @ 132:d34655255c78

update gcc-8.2
author mir3636
date Thu, 25 Oct 2018 10:21:07 +0900
parents ab0bcb71f44d 84e7813d76e9
children 351920fa3827
line wrap: on
line diff
--- a/gcc/config/i386/i386.md	Thu Oct 25 08:08:40 2018 +0900
+++ b/gcc/config/i386/i386.md	Thu Oct 25 10:21:07 2018 +0900
@@ -1,5 +1,5 @@
 ;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988-2017 Free Software Foundation, Inc.
+;; Copyright (C) 1988-2018 Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
 ;;
@@ -62,7 +62,7 @@
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
-;; ! -- print MPX or NOTRACK prefix for jxx/call/ret instructions if required.
+;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
 
 (define_c_enum "unspec" [
   ;; Relocation specifiers
@@ -99,9 +99,9 @@
   UNSPEC_SCAS
   UNSPEC_FNSTSW
   UNSPEC_SAHF
+  UNSPEC_NOTRAP
   UNSPEC_PARITY
   UNSPEC_FSTCW
-  UNSPEC_FLDCW
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
@@ -143,7 +143,6 @@
   UNSPEC_FRNDINT_FLOOR
   UNSPEC_FRNDINT_CEIL
   UNSPEC_FRNDINT_TRUNC
-  UNSPEC_FRNDINT_MASK_PM
   UNSPEC_FIST_FLOOR
   UNSPEC_FIST_CEIL
 
@@ -183,16 +182,6 @@
   UNSPEC_PDEP
   UNSPEC_PEXT
 
-  UNSPEC_BNDMK
-  UNSPEC_BNDMK_ADDR
-  UNSPEC_BNDSTX
-  UNSPEC_BNDLDX
-  UNSPEC_BNDLDX_ADDR
-  UNSPEC_BNDCL
-  UNSPEC_BNDCU
-  UNSPEC_BNDCN
-  UNSPEC_MPX_FENCE
-
   ;; IRET support
   UNSPEC_INTERRUPT_RETURN
 ])
@@ -236,6 +225,8 @@
   UNSPECV_XSAVEC64
   UNSPECV_XGETBV
   UNSPECV_XSETBV
+  UNSPECV_WBINVD
+  UNSPECV_WBNOINVD
 
   ;; For atomic compound assignments.
   UNSPECV_FNSTENV
@@ -286,6 +277,21 @@
   UNSPECV_WRUSS
   UNSPECV_SETSSBSY
   UNSPECV_CLRSSBSY
+
+  ;; For MOVDIRI and MOVDIR64B support
+  UNSPECV_MOVDIRI
+  UNSPECV_MOVDIR64B
+
+  ;; For WAITPKG support
+  UNSPECV_UMWAIT
+  UNSPECV_UMONITOR
+  UNSPECV_TPAUSE
+
+  ;; For CLDEMOTE support
+  UNSPECV_CLDEMOTE
+
+  ;; For Speculation Barrier support
+  UNSPECV_SPECULATION_BARRIER
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -352,69 +358,64 @@
    (ARGP_REG			16)
    (FLAGS_REG			17)
    (FPSR_REG			18)
-   (FPCR_REG			19)
-   (FRAME_REG			20)
-   (XMM0_REG			21)
-   (XMM1_REG			22)
-   (XMM2_REG			23)
-   (XMM3_REG			24)
-   (XMM4_REG			25)
-   (XMM5_REG			26)
-   (XMM6_REG			27)
-   (XMM7_REG			28)
-   (MM0_REG			29)
-   (MM1_REG			30)
-   (MM2_REG			31)
-   (MM3_REG			32)
-   (MM4_REG			33)
-   (MM5_REG			34)
-   (MM6_REG			35)
-   (MM7_REG			36)
-   (R8_REG			37)
-   (R9_REG			38)
-   (R10_REG			39)
-   (R11_REG			40)
-   (R12_REG			41)
-   (R13_REG			42)
-   (R14_REG			43)
-   (R15_REG			44)
-   (XMM8_REG			45)
-   (XMM9_REG			46)
-   (XMM10_REG			47)
-   (XMM11_REG			48)
-   (XMM12_REG			49)
-   (XMM13_REG			50)
-   (XMM14_REG			51)
-   (XMM15_REG			52)
-   (XMM16_REG			53)
-   (XMM17_REG			54)
-   (XMM18_REG			55)
-   (XMM19_REG			56)
-   (XMM20_REG			57)
-   (XMM21_REG			58)
-   (XMM22_REG			59)
-   (XMM23_REG			60)
-   (XMM24_REG			61)
-   (XMM25_REG			62)
-   (XMM26_REG			63)
-   (XMM27_REG			64)
-   (XMM28_REG			65)
-   (XMM29_REG			66)
-   (XMM30_REG			67)
-   (XMM31_REG			68)
-   (MASK0_REG			69)
-   (MASK1_REG			70)
-   (MASK2_REG			71)
-   (MASK3_REG			72)
-   (MASK4_REG			73)
-   (MASK5_REG			74)
-   (MASK6_REG			75)
-   (MASK7_REG			76)
-   (BND0_REG			77)
-   (BND1_REG			78)
-   (BND2_REG			79)
-   (BND3_REG			80)
-   (FIRST_PSEUDO_REG		81)
+   (FRAME_REG			19)
+   (XMM0_REG			20)
+   (XMM1_REG			21)
+   (XMM2_REG			22)
+   (XMM3_REG			23)
+   (XMM4_REG			24)
+   (XMM5_REG			25)
+   (XMM6_REG			26)
+   (XMM7_REG			27)
+   (MM0_REG			28)
+   (MM1_REG			29)
+   (MM2_REG			30)
+   (MM3_REG			31)
+   (MM4_REG			32)
+   (MM5_REG			33)
+   (MM6_REG			34)
+   (MM7_REG			35)
+   (R8_REG			36)
+   (R9_REG			37)
+   (R10_REG			38)
+   (R11_REG			39)
+   (R12_REG			40)
+   (R13_REG			41)
+   (R14_REG			42)
+   (R15_REG			43)
+   (XMM8_REG			44)
+   (XMM9_REG			45)
+   (XMM10_REG			46)
+   (XMM11_REG			47)
+   (XMM12_REG			48)
+   (XMM13_REG			49)
+   (XMM14_REG			50)
+   (XMM15_REG			51)
+   (XMM16_REG			52)
+   (XMM17_REG			53)
+   (XMM18_REG			54)
+   (XMM19_REG			55)
+   (XMM20_REG			56)
+   (XMM21_REG			57)
+   (XMM22_REG			58)
+   (XMM23_REG			59)
+   (XMM24_REG			60)
+   (XMM25_REG			61)
+   (XMM26_REG			62)
+   (XMM27_REG			63)
+   (XMM28_REG			64)
+   (XMM29_REG			65)
+   (XMM30_REG			66)
+   (XMM31_REG			67)
+   (MASK0_REG			68)
+   (MASK1_REG			69)
+   (MASK2_REG			70)
+   (MASK3_REG			71)
+   (MASK4_REG			72)
+   (MASK5_REG			73)
+   (MASK6_REG			74)
+   (MASK7_REG			75)
+   (FIRST_PSEUDO_REG		76)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -428,7 +429,7 @@
 
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
-		    atom,slm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
+		    atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
 		    bdver4,btver2,znver1"
   (const (symbol_ref "ix86_schedule")))
 
@@ -449,8 +450,7 @@
    ssecvt,ssecvt1,sseicvt,sseins,
    sseshuf,sseshuf1,ssemuladd,sse4arg,
    lwp,mskmov,msklog,
-   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft,
-   mpxmov,mpxmk,mpxchk,mpxld,mpxst"
+   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
 ;; Main data type used by the insn
@@ -479,8 +479,7 @@
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
-			  bitmanip,imulx,msklog,mskmov,mpxmk,mpxmov,mpxchk,
-			  mpxld,mpxst")
+			  bitmanip,imulx,msklog,mskmov")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
@@ -535,17 +534,13 @@
 	   (const_int 0)
 	 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
 	   (const_int 1)
-	 (and (eq_attr "type" "ibr,call,callv")
-	      (match_test "ix86_bnd_prefixed_insn_p (insn)"))
-	   (const_int 1)
 	]
 	(const_int 0)))
 
 ;; Set when 0f opcode prefix is used.
 (define_attr "prefix_0f" ""
   (if_then_else
-    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov,
-			  mpxmk,mpxmov,mpxchk,mpxld,mpxst")
+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
 	 (eq_attr "unit" "sse,mmx"))
     (const_int 1)
     (const_int 0)))
@@ -581,9 +576,6 @@
 	]
 	(const_int 0)))
 
-;; Set when BND opcode prefix may be used.
-(define_attr "maybe_prefix_bnd" "" (const_int 0))
-
 ;; Prefix used: original, VEX or maybe VEX.
 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
   (cond [(eq_attr "mode" "OI,V8SF,V4DF")
@@ -651,19 +643,6 @@
 	 ]
 	 (const_int 1)))
 
-(define_attr "modrm_class" "none,incdec,op0,op01,op02,pushpop,unknown"
-  (cond [(eq_attr "modrm" "0")
-	   (const_string "none")
-	 (eq_attr "type" "alu,imul,ishift")
-	   (const_string "op02")
-	 (eq_attr "type" "imov,imovx,lea,alu1,icmp")
-	   (const_string "op01")
-	 (eq_attr "type" "incdec")
-	   (const_string "incdec")
-	 (eq_attr "type" "push,pop")
-	   (const_string "pushpop")]
-	 (const_string "unknown")))
-
 ;; The (bounding maximum) length of an instruction in bytes.
 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
 ;; Later we may want to split them and compute proper length as for
@@ -710,16 +689,12 @@
 (define_attr "memory" "none,load,store,both,unknown"
   (cond [(eq_attr "type" "other,multi,str,lwp")
 	   (const_string "unknown")
-	 (eq_attr "type" "lea,fcmov,fpspc,mpxmk,mpxchk")
+	 (eq_attr "type" "lea,fcmov,fpspc")
 	   (const_string "none")
 	 (eq_attr "type" "fistp,leave")
 	   (const_string "both")
 	 (eq_attr "type" "frndint")
 	   (const_string "load")
-	 (eq_attr "type" "mpxld")
-	   (const_string "load")
-	 (eq_attr "type" "mpxst")
-	   (const_string "store")
 	 (eq_attr "type" "push")
 	   (if_then_else (match_operand 1 "memory_operand")
 	     (const_string "both")
@@ -749,7 +724,7 @@
 	   (if_then_else (match_operand 1 "constant_call_address_operand")
 	     (const_string "none")
 	     (const_string "load"))
-	 (and (eq_attr "type" "alu1,negnot,ishift1,sselog1,sseshuf1")
+	 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
 	      (match_operand 1 "memory_operand"))
 	   (const_string "both")
 	 (and (match_operand 0 "memory_operand")
@@ -760,12 +735,12 @@
 	 (match_operand 1 "memory_operand")
 	   (const_string "load")
 	 (and (eq_attr "type"
-		 "!alu1,negnot,ishift1,
+		 "!alu1,negnot,ishift1,rotate1,
 		   imov,imovx,icmp,test,bitmanip,
 		   fmov,fcmp,fsgn,
 		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
 		   sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
-		   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog,mpxmov")
+		   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
 	      (match_operand 2 "memory_operand"))
 	   (const_string "load")
 	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
@@ -797,7 +772,7 @@
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any"
   (const_string "any"))
 
 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
@@ -807,7 +782,7 @@
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
+(define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
 		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
 		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
 		    avx512bw,noavx512bw,avx512dq,noavx512dq,
@@ -816,6 +791,8 @@
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
+	 (eq_attr "isa" "x64_sse2")
+	   (symbol_ref "TARGET_64BIT && TARGET_SSE2")
 	 (eq_attr "isa" "x64_sse4")
 	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
 	 (eq_attr "isa" "x64_sse4_noavx")
@@ -943,7 +920,7 @@
 (define_code_iterator absneg [abs neg])
 
 ;; Base name for x87 insn mnemonic.
-(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
+(define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
 
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
@@ -964,10 +941,14 @@
 ;; Used in signed and unsigned fix.
 (define_code_iterator any_fix [fix unsigned_fix])
 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+(define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
+(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
 
 ;; Used in signed and unsigned float.
 (define_code_iterator any_float [float unsigned_float])
 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
+(define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
+(define_code_attr floatprefix [(float "s") (unsigned_float "u")])
 
 ;; All integer modes.
 (define_mode_iterator SWI1248x [QI HI SI DI])
@@ -1049,21 +1030,6 @@
 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
 			    (DI "TARGET_64BIT")])
 
-;; Bound modes.
-(define_mode_iterator BND [(BND32 "!TARGET_LP64")
-			   (BND64 "TARGET_LP64")])
-
-;; Pointer mode corresponding to bound mode.
-(define_mode_attr bnd_ptr [(BND32 "SI") (BND64 "DI")])
-
-;; MPX check types
-(define_int_iterator BNDCHECK [UNSPEC_BNDCL UNSPEC_BNDCU UNSPEC_BNDCN])
-
-;; Check name
-(define_int_attr bndcheck [(UNSPEC_BNDCL "cl")
-			   (UNSPEC_BNDCU "cu")
-			   (UNSPEC_BNDCN "cn")])
-
 ;; Instruction suffix for integer modes.
 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
 
@@ -1195,6 +1161,7 @@
 
 ;; Instruction suffix for REX 64bit operators.
 (define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+(define_mode_attr rex64namesuffix [(SI "") (DI "q")])
 
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
@@ -1223,6 +1190,7 @@
 (include "geode.md")
 (include "atom.md")
 (include "slm.md")
+(include "glm.md")
 (include "core2.md")
 (include "haswell.md")
 
@@ -1274,6 +1242,25 @@
 	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
 		    (match_operand:SWI48 1 "<general_operand>")))])
 
+(define_mode_iterator SWI1248_AVX512BWDQ2_64
+  [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
+   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
+
+(define_insn "*cmp<mode>_ccz_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI1248_AVX512BWDQ2_64 0
+			"nonimmediate_operand" "<r>,?m<r>,$k")
+		 (match_operand:SWI1248_AVX512BWDQ2_64 1 "const0_operand")))]
+  "ix86_match_ccmode (insn, CCZmode)"
+  "@
+   test{<imodesuffix>}\t%0, %0
+   cmp{<imodesuffix>}\t{%1, %0|%0, %1}
+   ktest<mskmodesuffix>\t%0, %0"
+  [(set_attr "type" "test,icmp,msklog")
+   (set_attr "length_immediate" "0,1,*")
+   (set_attr "prefix" "*,*,vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*cmp<mode>_ccno_1"
   [(set (reg FLAGS_REG)
 	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
@@ -1284,7 +1271,6 @@
    cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "test,icmp")
    (set_attr "length_immediate" "0,1")
-   (set_attr "modrm_class" "op0,unknown")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*cmp<mode>_1"
@@ -1475,55 +1461,18 @@
   DONE;
 })
 
-
 ;; FP compares, step 1:
-;; Set the FP condition codes.
-;;
-;; CCFPmode	compare with exceptions
-;; CCFPUmode	compare with no exceptions
-
-;; We may not use "#" to split and emit these, since the REG_DEAD notes
-;; used to manage the reg stack popping would not be preserved.
-
-(define_insn "*cmp<mode>_0_i387"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-	(unspec:HI
-	  [(compare:CCFP
-	     (match_operand:X87MODEF 1 "register_operand" "f")
-	     (match_operand:X87MODEF 2 "const0_operand"))]
-	UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, false, false);"
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn_and_split "*cmp<mode>_0_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-	(compare:CCFP
-	  (match_operand:X87MODEF 1 "register_operand" "f")
-	  (match_operand:X87MODEF 2 "const0_operand")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(unspec:HI
-	  [(compare:CCFP (match_dup 1)(match_dup 2))]
-	UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
+;; Set the FP condition codes and move fpsr to ax.
+
+;; We may not use "#" to split and emit these
+;; due to reg-stack pops killing fpsr.
 
 (define_insn "*cmpxf_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(unspec:HI
 	  [(compare:CCFP
 	     (match_operand:XF 1 "register_operand" "f")
-	     (match_operand:XF 2 "register_operand" "f"))]
+	     (match_operand:XF 2 "reg_or_0_operand" "fC"))]
 	  UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, false, false);"
@@ -1531,32 +1480,12 @@
    (set_attr "unit" "i387")
    (set_attr "mode" "XF")])
 
-(define_insn_and_split "*cmpxf_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-	(compare:CCFP
-	  (match_operand:XF 1 "register_operand" "f")
-	  (match_operand:XF 2 "register_operand" "f")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(unspec:HI
-	  [(compare:CCFP (match_dup 1)(match_dup 2))]
-	UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "XF")])
-
 (define_insn "*cmp<mode>_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(unspec:HI
 	  [(compare:CCFP
 	     (match_operand:MODEF 1 "register_operand" "f")
-	     (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+	     (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
 	  UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, false, false);"
@@ -1564,66 +1493,13 @@
    (set_attr "unit" "i387")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*cmp<mode>_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-	(compare:CCFP
-	  (match_operand:MODEF 1 "register_operand" "f")
-	  (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(unspec:HI
-	  [(compare:CCFP (match_dup 1)(match_dup 2))]
-	UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*cmpu<mode>_i387"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-	(unspec:HI
-	  [(compare:CCFPU
-	     (match_operand:X87MODEF 1 "register_operand" "f")
-	     (match_operand:X87MODEF 2 "register_operand" "f"))]
-	  UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, false, true);"
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn_and_split "*cmpu<mode>_cc_i387"
-  [(set (reg:CCFPU FLAGS_REG)
-	(compare:CCFPU
-	  (match_operand:X87MODEF 1 "register_operand" "f")
-	  (match_operand:X87MODEF 2 "register_operand" "f")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(unspec:HI
-	  [(compare:CCFPU (match_dup 1)(match_dup 2))]
-	UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(unspec:HI
 	  [(compare:CCFP
 	     (match_operand:X87MODEF 1 "register_operand" "f")
 	     (float:X87MODEF
-	       (match_operand:SWI24 2 "memory_operand" "m")))]
+	       (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
 	  UNSPEC_FNSTSW))]
   "TARGET_80387
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
@@ -1634,45 +1510,22 @@
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<SWI24:MODE>")])
 
-(define_insn_and_split "*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-	(compare:CCFP
-	  (match_operand:X87MODEF 1 "register_operand" "f")
-	  (float:X87MODEF
-	    (match_operand:SWI24 2 "memory_operand" "m"))))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE
-   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
-       || optimize_function_for_size_p (cfun))"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
+(define_insn "*cmpu<mode>_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
 	(unspec:HI
-	  [(compare:CCFP
-	     (match_dup 1)
-	     (float:X87MODEF (match_dup 2)))]
-	UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
+	  [(unspec:CCFP
+	     [(compare:CCFP
+		(match_operand:X87MODEF 1 "register_operand" "f")
+		(match_operand:X87MODEF 2 "register_operand" "f"))]
+	     UNSPEC_NOTRAP)]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, true);"
   [(set_attr "type" "multi")
    (set_attr "unit" "i387")
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<SWI24:MODE>")])
-
-;; FP compares, step 2
-;; Move the fpsw to ax.
-
-(define_insn "x86_fnstsw_1"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "fnstsw\t%0"
-  [(set_attr "length" "2")
-   (set_attr "mode" "SI")
-   (set_attr "unit" "i387")])
-
-;; FP compares, step 3
+   (set_attr "mode" "<MODE>")])
+
+;; FP compares, step 2:
 ;; Get ax into flags, general case.
 
 (define_insn "x86_sahf_1"
@@ -1694,23 +1547,45 @@
    (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
-;; Pentium Pro can do steps 1 through 3 in one go.
+;; Pentium Pro can do both steps in one go.
 ;; (these instructions set flags directly)
 
-(define_mode_iterator FPCMP [CCFP CCFPU])
-(define_mode_attr unord [(CCFP "") (CCFPU "u")])
-
-(define_insn "*cmpi<FPCMP:unord><MODEF:mode>"
-  [(set (reg:FPCMP FLAGS_REG)
-	(compare:FPCMP
+(define_subst_attr "unord" "unord_subst" "" "u")
+(define_subst_attr "unordered" "unord_subst" "false" "true")
+
+(define_subst "unord_subst"
+  [(set (match_operand:CCFP 0)
+        (match_operand:CCFP 1))]
+  ""
+  [(set (match_dup 0)
+        (unspec:CCFP
+	  [(match_dup 1)]
+	  UNSPEC_NOTRAP))])
+
+(define_insn "*cmpi<unord>xf_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:XF 0 "register_operand" "f")
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "* return output_fp_compare (insn, operands, true, <unordered>);"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "znver1_decode" "double")])
+
+(define_insn "*cmpi<unord><MODEF:mode>"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
 	  (match_operand:MODEF 0 "register_operand" "f,v")
 	  (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
   "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_80387 && TARGET_CMOVE)"
   "@
-   * return output_fp_compare (insn, operands, true, \
-			       <FPCMP:MODE>mode == CCFPUmode);
-   %v<FPCMP:unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
+   * return output_fp_compare (insn, operands, true, <unordered>);
+   %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "fcmp,ssecomi")
    (set_attr "prefix" "orig,maybe_vex")
    (set_attr "mode" "<MODEF:MODE>")
@@ -1737,21 +1612,6 @@
 	 (eq_attr "alternative" "0")
 	 (symbol_ref "true")
 	 (symbol_ref "false"))))])
-
-(define_insn "*cmpi<unord>xf_i387"
-  [(set (reg:FPCMP FLAGS_REG)
-	(compare:FPCMP
-	  (match_operand:XF 0 "register_operand" "f")
-	  (match_operand:XF 1 "register_operand" "f")))]
-  "TARGET_80387 && TARGET_CMOVE"
-  "* return output_fp_compare (insn, operands, true,
-			       <MODE>mode == CCFPUmode);"
-  [(set_attr "type" "fcmp")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")
-   (set_attr "bdver1_decode" "double")
-   (set_attr "znver1_decode" "double")])
 
 ;; Push/pop instructions.
 
@@ -1989,7 +1849,6 @@
   "reload_completed"
   "xor{l}\t%k0, %k0"
   [(set_attr "type" "alu1")
-   (set_attr "modrm_class" "op0")
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "0")])
 
@@ -2013,7 +1872,7 @@
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       if (misaligned_operand (operands[0], XImode)
@@ -2040,7 +1899,7 @@
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       if (misaligned_operand (operands[0], OImode)
@@ -2086,7 +1945,7 @@
 
 (define_insn "*movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
-	(match_operand:TI 1 "general_operand"	   "riFo,re,C,BC,vm,v,Ye,r"))]
+	(match_operand:TI 1 "general_operand"	   "riFo,re,C,BC,vm,v,Yd,r"))]
   "(TARGET_64BIT
     && !(MEM_P (operands[0]) && MEM_P (operands[1])))
    || (TARGET_SSE
@@ -2100,7 +1959,7 @@
       return "#";
 
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       /* TDmode values are passed as TImode on the stack.  Moving them
@@ -2166,12 +2025,19 @@
 	       (match_test "optimize_function_for_size_p (cfun)")
 		 (const_string "V4SF")
 	       ]
-	       (const_string "TI")))])
+	       (const_string "TI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "6")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "7")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+	   (symbol_ref "true")))])
 
 (define_split
   [(set (match_operand:TI 0 "sse_reg_operand")
         (match_operand:TI 1 "general_reg_operand"))]
-  "TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC
+  "TARGET_64BIT && TARGET_SSE4_1
    && reload_completed"
   [(set (match_dup 2)
   	(vec_merge:V2DI
@@ -2188,9 +2054,9 @@
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yd,?r ,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,v,*Ye,r   ,*Yj,r   ,*Yj ,*Yn ,*r,*km,*k,*k"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*km,*k,*k"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2212,7 +2078,7 @@
       return "movq\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
@@ -2223,10 +2089,13 @@
 	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
 	    return "%vmovd\t{%1, %0|%0, %1}";
 	  return "%vmovq\t{%1, %0|%0, %1}";
+
 	case MODE_TI:
+	  /* Handle AVX512 registers set.  */
+	  if (EXT_REX_SSE_REG_P (operands[0])
+	      || EXT_REX_SSE_REG_P (operands[1]))
+	    return "vmovdqa64\t{%1, %0|%0, %1}";
 	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
 
 	case MODE_V2SF:
 	  gcc_assert (!TARGET_AVX);
@@ -2265,8 +2134,12 @@
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,17,18")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "2,3,4,5,10,11,19,20,23,25")
+	    (eq_attr "alternative" "2,3,4,5,10,11,23,25")
 	      (const_string "x64")
+	    (eq_attr "alternative" "19,20")
+	      (const_string "x64_sse2")
+	    (eq_attr "alternative" "21,22")
+	      (const_string "sse2")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -2318,7 +2191,7 @@
 	    (eq_attr "alternative" "12,13")
 	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
 			  (match_operand 1 "ext_sse_reg_operand"))
-		       (const_string "XI")
+		       (const_string "TI")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		       (const_string "V4SF")
@@ -2334,6 +2207,13 @@
 	      (const_string "V2SF")
 	   ]
 	   (const_string "DI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "10,17,19")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "11,18,20")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+	   (symbol_ref "true")))
    (set (attr "enabled")
      (cond [(eq_attr "alternative" "15")
               (if_then_else
@@ -2351,7 +2231,7 @@
 (define_split
   [(set (match_operand:<DWI> 0 "general_reg_operand")
         (match_operand:<DWI> 1 "sse_reg_operand"))]
-  "TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC
+  "TARGET_SSE4_1
    && reload_completed"
   [(set (match_dup 2)
   	(vec_select:DWIH
@@ -2375,7 +2255,7 @@
 (define_split
   [(set (match_operand:DI 0 "sse_reg_operand")
         (match_operand:DI 1 "general_reg_operand"))]
-  "!TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC
+  "!TARGET_64BIT && TARGET_SSE4_1
    && reload_completed"
   [(set (match_dup 2)
   	(vec_merge:V4SI
@@ -2414,15 +2294,15 @@
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-    "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?*Yi,*k,*k ,*rm")
+    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm")
 	(match_operand:SI 1 "general_operand"
-    "g ,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,*Yj,r   ,*r,*km,*k"))]
+    "g ,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,*v,r  ,*r,*km,*k"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_MSKMOV:
       return "kmovd\t{%1, %0|%0, %1}";
@@ -2477,7 +2357,12 @@
       gcc_unreachable ();
     }
 }
-  [(set (attr "type")
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "12,13")
+	      (const_string "sse2")
+	   ]
+	   (const_string "*")))
+   (set (attr "type")
      (cond [(eq_attr "alternative" "2")
 	      (const_string "mmx")
 	    (eq_attr "alternative" "3,4,5,6,7")
@@ -2522,7 +2407,14 @@
 	         (not (match_test "TARGET_SSE2")))
 	      (const_string "SF")
 	   ]
-	   (const_string "SI")))])
+	   (const_string "SI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "6,12")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "7,13")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+	   (symbol_ref "true")))])
 
 (define_insn "*movhi_internal"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m")
@@ -2841,7 +2733,6 @@
   "reload_completed"
   "xor{<imodesuffix>}\t%0, %0"
   [(set_attr "type" "alu1")
-   (set_attr "modrm_class" "op0")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
@@ -3293,7 +3184,7 @@
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       /* Handle misaligned load/store since we
@@ -3434,9 +3325,9 @@
 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi,r  ,o ,r  ,m")
+    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r  ,o ,r  ,m")
 	(match_operand:DF 1 "general_operand"
-    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r ,roF,rF,rmF,rC"))]
+    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (lra_in_progress || reload_completed
        || !CONST_DOUBLE_P (operands[1])
@@ -3470,14 +3361,14 @@
 	return "mov{q}\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
 	{
 	case MODE_DF:
 	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	    return "vmovsd\t{%d1, %0|%0, %d1}";
 	  return "%vmovsd\t{%1, %0|%0, %1}";
 
 	case MODE_V4SF:
@@ -3512,10 +3403,12 @@
   [(set (attr "isa")
 	(cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
 		 (const_string "nox64")
-	       (eq_attr "alternative" "8,9,10,11,20,21,24,25")
+	       (eq_attr "alternative" "8,9,10,11,24,25")
 		 (const_string "x64")
 	       (eq_attr "alternative" "12,13,14,15")
 		 (const_string "sse2")
+	       (eq_attr "alternative" "20,21")
+		 (const_string "x64_sse2")
 	      ]
 	      (const_string "*")))
    (set (attr "type")
@@ -3616,7 +3509,12 @@
            (symbol_ref "true")))
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "3,4")
-              (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
+              (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
+	    (eq_attr "alternative" "20")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "21")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
            (symbol_ref "true")))
    (set (attr "enabled")
      (cond [(eq_attr "alternative" "22,23,24,25")
@@ -3638,9 +3536,9 @@
 
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
-	  "=Yf*f,m   ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym,r  ,m")
+	  "=Yf*f,m   ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r  ,m")
 	(match_operand:SF 1 "general_operand"
-	  "Yf*fm,Yf*f,G   ,rmF,rF,C,v,m,v,Yj,r  ,*y ,m  ,*y,*Yn,r   ,rmF,rF"))]
+	  "Yf*fm,Yf*f,G   ,rmF,rF,C,v,m,v,v ,r ,*y ,m  ,*y,*y,r  ,rmF,rF"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (lra_in_progress || reload_completed
        || !CONST_DOUBLE_P (operands[1])
@@ -3664,14 +3562,14 @@
       return "mov{l}\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands[1]);
+      return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
 	{
 	case MODE_SF:
 	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovss\t{%1, %0, %0|%0, %0, %1}";
+	    return "vmovss\t{%d1, %0|%0, %d1}";
 	  return "%vmovss\t{%1, %0|%0, %1}";
 
 	case MODE_V16SF:
@@ -3702,7 +3600,12 @@
       gcc_unreachable ();
     }
 }
-  [(set (attr "type")
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "14,15")
+	      (const_string "sse2")
+	   ]
+	   (const_string "*")))
+   (set (attr "type")
 	(cond [(eq_attr "alternative" "0,1,2")
 		 (const_string "fmov")
 	       (eq_attr "alternative" "3,4,16,17")
@@ -3764,6 +3667,13 @@
 		       (const_string "SF"))
 	      ]
 	      (const_string "SF")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "9,14")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "10,15")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+           (symbol_ref "true")))
    (set (attr "enabled")
      (cond [(eq_attr "alternative" "16,17")
               (if_then_else
@@ -3777,7 +3687,7 @@
 
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
-	(match_operand 1 "nonimmediate_operand"))]
+	(match_operand 1 "memory_operand"))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
@@ -3789,7 +3699,7 @@
 
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
-	(float_extend (match_operand 1 "nonimmediate_operand")))]
+	(float_extend (match_operand 1 "memory_operand")))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
@@ -3815,7 +3725,7 @@
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
-(define_insn "swapxf"
+(define_insn "*swapxf"
   [(set (match_operand:XF 0 "register_operand" "+f")
 	(match_operand:XF 1 "register_operand" "+f"))
    (set (match_dup 1)
@@ -3829,22 +3739,8 @@
 }
   [(set_attr "type" "fxch")
    (set_attr "mode" "XF")])
-
-(define_insn "*swap<mode>"
-  [(set (match_operand:MODEF 0 "fp_register_operand" "+f")
-	(match_operand:MODEF 1 "fp_register_operand" "+f"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "TARGET_80387 || reload_completed"
-{
-  if (STACK_TOP_P (operands[0]))
-    return "fxch\t%1";
-  else
-    return "fxch\t%0";
-}
-  [(set_attr "type" "fxch")
-   (set_attr "mode" "<MODE>")])
 
+
 ;; Zero extension instructions
 
 (define_expand "zero_extendsidi2"
@@ -3853,10 +3749,10 @@
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-		"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?*Yi,*x,*x,*v,*r")
+		"=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r")
 	(zero_extend:DI
 	 (match_operand:SI 1 "x86_64_zext_operand"
-	        "0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,r   ,m ,*x,*v,*k")))]
+	        "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k")))]
   ""
 {
   switch (get_attr_type (insn))
@@ -3900,7 +3796,7 @@
 	      (const_string "nox64")
 	    (eq_attr "alternative" "3")
 	      (const_string "x64")
-	    (eq_attr "alternative" "9")
+	    (eq_attr "alternative" "7,8,9")
 	      (const_string "sse2")
 	    (eq_attr "alternative" "10")
 	      (const_string "sse4")
@@ -3946,7 +3842,14 @@
 	    (eq_attr "alternative" "8,10,11")
 	      (const_string "TI")
 	   ]
-	   (const_string "SI")))])
+	   (const_string "SI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "7")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "5,8")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+           (symbol_ref "true")))])
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand")
@@ -3972,15 +3875,6 @@
    (set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
-(define_peephole2
-  [(set (match_operand:DI 0 "general_reg_operand")
-	(zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))
-   (set (match_operand:DI 2 "sse_reg_operand") (match_dup 0))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
-   && peep2_reg_dead_p (2, operands[0])"
-  [(set (match_dup 2)
-	(zero_extend:DI (match_dup 1)))])
-
 (define_mode_attr kmov_isa
   [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
 
@@ -4423,6 +4317,40 @@
     }
 })
 
+(define_insn "*extendsfdf2"
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "SF,XF,DF")
+   (set (attr "enabled")
+     (if_then_else
+       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
+       (if_then_else
+	 (eq_attr "alternative" "0,1")
+	 (symbol_ref "TARGET_MIX_SSE_I387")
+	 (symbol_ref "true"))
+       (if_then_else
+	 (eq_attr "alternative" "0,1")
+	 (symbol_ref "true")
+	 (symbol_ref "false"))))])
+
 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
    cvtss2sd:
       unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
@@ -4490,39 +4418,31 @@
    (set (match_dup 0) (float_extend:DF (match_dup 2)))]
   "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
 
-(define_insn "*extendsfdf2"
-  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+;; Break partial reg stall for cvtss2sd.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+  [(set (match_operand:DF 0 "sse_reg_operand")
         (float_extend:DF
-	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,ssecvt")
-   (set_attr "prefix" "orig,orig,maybe_vex")
-   (set_attr "mode" "SF,XF,DF")
-   (set (attr "enabled")
-     (if_then_else
-       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
-       (if_then_else
-	 (eq_attr "alternative" "0,1")
-	 (symbol_ref "TARGET_MIX_SSE_I387")
-	 (symbol_ref "true"))
-       (if_then_else
-	 (eq_attr "alternative" "0,1")
-	 (symbol_ref "true")
-	 (symbol_ref "false"))))])
+          (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && (!REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))
+   && (!EXT_REX_SSE_REG_P (operands[0])
+       || TARGET_AVX512VL)"
+  [(set (match_dup 0)
+        (vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float_extend:DF
+	      (match_dup 1)))
+	  (match_dup 0)
+          (const_int 1)))]
+{
+  operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
+  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
 
 (define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand")
@@ -4561,23 +4481,43 @@
 
 ;; Conversion from DFmode to SFmode.
 
-(define_expand "truncdfsf2"
-  [(set (match_operand:SF 0 "nonimmediate_operand")
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v")
 	(float_truncate:SF
-	  (match_operand:DF 1 "nonimmediate_operand")))]
+	  (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
-  if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
-    ;
-  else if (flag_unsafe_math_optimizations)
-    ;
-  else
-    {
-      rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP);
-      emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
-      DONE;
-    }
-})
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "mode" "SF")
+   (set (attr "enabled")
+     (if_then_else
+       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
+       (cond [(eq_attr "alternative" "0")
+		(symbol_ref "TARGET_MIX_SSE_I387")
+	      (eq_attr "alternative" "1")
+		(symbol_ref "TARGET_MIX_SSE_I387
+			     && flag_unsafe_math_optimizations")
+	   ]
+	   (symbol_ref "true"))
+       (cond [(eq_attr "alternative" "0")
+		(symbol_ref "true")
+	      (eq_attr "alternative" "1")
+		(symbol_ref "flag_unsafe_math_optimizations")
+	   ]
+	   (symbol_ref "false"))))])
 
 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
    cvtsd2ss:
@@ -4588,7 +4528,7 @@
    anyway.  */
 (define_split
   [(set (match_operand:SF 0 "sse_reg_operand")
-        (float_truncate:SF
+	(float_truncate:SF
 	  (match_operand:DF 1 "nonimmediate_operand")))]
   "TARGET_USE_VECTOR_FP_CONVERTS
    && optimize_insn_for_speed_p ()
@@ -4625,7 +4565,7 @@
 				   CONST0_RTX (DFmode)));
 })
 
-;; It's more profitable to split and then extend in the same register.
+;; It's more profitable to split and then truncate in the same register.
 (define_peephole2
   [(set (match_operand:SF 0 "sse_reg_operand")
 	(float_truncate:SF
@@ -4636,197 +4576,47 @@
    (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
   "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
 
-(define_expand "truncdfsf2_with_temp"
-  [(parallel [(set (match_operand:SF 0)
-		   (float_truncate:SF (match_operand:DF 1)))
-	      (clobber (match_operand:SF 2))])])
-
-;; SSE alternative doesn't depend on flag_unsafe_math_optimizations,
-;; because nothing we do there is unsafe.
-(define_insn "*truncdfsf_fast_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,v")
-        (float_truncate:SF
-          (match_operand:DF 1 "nonimmediate_operand" "f  ,vm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-    case 1:
-      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,ssecvt")
-   (set_attr "prefix" "orig,maybe_vex")
-   (set_attr "mode" "SF")
-   (set (attr "enabled")
-     (cond [(eq_attr "alternative" "0")
-              (symbol_ref "TARGET_MIX_SSE_I387
-			   && flag_unsafe_math_optimizations")
-	   ]
-           (symbol_ref "true")))])
-
-(define_insn "*truncdfsf_fast_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm")
+;; Break partial reg stall for cvtsd2ss.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+  [(set (match_operand:SF 0 "sse_reg_operand")
         (float_truncate:SF
-          (match_operand:DF 1 "nonimmediate_operand" "f")))]
-  "TARGET_80387 && flag_unsafe_math_optimizations"
-  "* return output_387_reg_move (insn, operands);"
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncdfsf_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,v ,?f,?v,?*r")
-	(float_truncate:SF
-	  (match_operand:DF 1 "nonimmediate_operand" "f ,vm,f ,f ,f")))
-   (clobber (match_operand:SF 2 "memory_operand"     "=X,X ,m ,m ,m"))]
-  "TARGET_MIX_SSE_I387"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-    case 1:
-      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
-
-    default:
-      return "#";
-    }
-}
-  [(set_attr "isa" "*,sse2,*,*,*")
-   (set_attr "type" "fmov,ssecvt,multi,multi,multi")
-   (set_attr "unit" "*,*,i387,i387,i387")
-   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncdfsf_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?f,?v,?*r")
-	(float_truncate:SF
-	  (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f")))
-   (clobber (match_operand:SF 2 "memory_operand"     "=X,m ,m ,m"))]
-  "TARGET_80387"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-
-    default:
-      return "#";
-    }
-}
-  [(set_attr "type" "fmov,multi,multi,multi")
-   (set_attr "unit" "*,i387,i387,i387")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncdfsf2_i387_1"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-	(float_truncate:SF
-	  (match_operand:DF 1 "register_operand" "f")))]
-  "TARGET_80387
-   && !(TARGET_SSE2 && TARGET_SSE_MATH)
-   && !TARGET_MIX_SSE_I387"
-  "* return output_387_reg_move (insn, operands);"
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "SF")])
-
-(define_split
-  [(set (match_operand:SF 0 "register_operand")
-	(float_truncate:SF
-	 (match_operand:DF 1 "fp_register_operand")))
-   (clobber (match_operand 2))]
-  "reload_completed"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));")
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && (!REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))
+   && (!EXT_REX_SSE_REG_P (operands[0])
+       || TARGET_AVX512VL)"
+  [(set (match_dup 0)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:SF
+	      (match_dup 1)))
+	  (match_dup 0)
+	  (const_int 1)))]
+{
+  operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
+  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
 
 ;; Conversion from XFmode to {SF,DF}mode
 
-(define_expand "truncxf<mode>2"
-  [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand")
-		   (float_truncate:MODEF
-		     (match_operand:XF 1 "register_operand")))
-	      (clobber (match_dup 2))])]
-  "TARGET_80387"
-{
-  if (flag_unsafe_math_optimizations)
-    {
-      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
-      if (reg != operands[0])
-	emit_move_insn (operands[0], reg);
-      DONE;
-    }
-  else
-    operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-})
-
-(define_insn "*truncxfsf2_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?v,?*r")
-	(float_truncate:SF
-	  (match_operand:XF 1 "register_operand"   "f ,f ,f ,f")))
-   (clobber (match_operand:SF 2 "memory_operand"   "=X,m ,m ,m"))]
-  "TARGET_80387"
-{
-  gcc_assert (!which_alternative);
-  return output_387_reg_move (insn, operands);
-}
-  [(set_attr "type" "fmov,multi,multi,multi")
-   (set_attr "unit" "*,i387,i387,i387")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncxfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?v,?*r")
-	(float_truncate:DF
-	  (match_operand:XF 1 "register_operand"   "f ,f ,f  ,f")))
-   (clobber (match_operand:DF 2 "memory_operand"   "=X,m ,m  ,m"))]
-  "TARGET_80387"
-{
-  gcc_assert (!which_alternative);
-  return output_387_reg_move (insn, operands);
-}
-  [(set_attr "isa" "*,*,sse2,*")
-   (set_attr "type" "fmov,multi,multi,multi")
-   (set_attr "unit" "*,i387,i387,i387")
-   (set_attr "mode" "DF")])
-
-(define_insn "truncxf<mode>2_i387_noop"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
+(define_insn "truncxf<mode>2"
+  [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
 	(float_truncate:MODEF
-	  (match_operand:XF 1 "register_operand" "f")))]
-  "TARGET_80387 && flag_unsafe_math_optimizations"
-  "* return output_387_reg_move (insn, operands);"
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*truncxf<mode>2_i387"
-  [(set (match_operand:MODEF 0 "memory_operand" "=m")
-	(float_truncate:MODEF
-	  (match_operand:XF 1 "register_operand" "f")))]
+	  (match_operand:XF 1 "register_operand" "f,f")))]
   "TARGET_80387"
   "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:MODEF 0 "register_operand")
-	(float_truncate:MODEF
-	  (match_operand:XF 1 "register_operand")))
-   (clobber (match_operand:MODEF 2 "memory_operand"))]
-  "TARGET_80387 && reload_completed"
-  [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
-   (set (match_dup 0) (match_dup 2))])
-
-(define_split
-  [(set (match_operand:MODEF 0 "memory_operand")
-	(float_truncate:MODEF
-	  (match_operand:XF 1 "register_operand")))
-   (clobber (match_operand:MODEF 2 "memory_operand"))]
-  "TARGET_80387"
-  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))])
+   (set_attr "mode" "<MODE>")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "flag_unsafe_math_optimizations")
+	   ]
+	   (symbol_ref "true")))])
 
 ;; Signed conversion to DImode.
 
@@ -4838,7 +4628,7 @@
 {
   if (TARGET_FISTTP)
    {
-     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
      DONE;
    }
 })
@@ -4852,7 +4642,7 @@
   if (TARGET_FISTTP
       && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
    {
-     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
      DONE;
    }
   if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
@@ -4875,7 +4665,7 @@
 {
   if (TARGET_FISTTP)
    {
-     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
      DONE;
    }
 })
@@ -4889,7 +4679,7 @@
   if (TARGET_FISTTP
       && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
    {
-     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
      DONE;
    }
   if (SSE_FLOAT_MODE_P (<MODE>mode))
@@ -4913,11 +4703,23 @@
 {
   if (TARGET_FISTTP)
    {
-     emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+     emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
      DONE;
    }
 })
 
+;; Unsigned conversion to DImode
+
+(define_insn "fixuns_trunc<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	  (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
+  "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
 ;; Unsigned conversion to SImode.
 
 (define_expand "fixuns_trunc<mode>si2"
@@ -4928,13 +4730,19 @@
      (use (match_dup 2))
      (clobber (match_scratch:<ssevecmode> 3))
      (clobber (match_scratch:<ssevecmode> 4))])]
-  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+  "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
 {
   machine_mode mode = <MODE>mode;
   machine_mode vecmode = <ssevecmode>mode;
   REAL_VALUE_TYPE TWO31r;
   rtx two31;
 
+  if (TARGET_AVX512F)
+    {
+      emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
+      DONE;
+    }
+
   if (optimize_insn_for_size_p ())
     FAIL;
 
@@ -4944,6 +4752,27 @@
   operands[2] = force_reg (vecmode, two31);
 })
 
+(define_insn "fixuns_trunc<mode>si2_avx512f"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unsigned_fix:SI
+	  (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F && TARGET_SSE_MATH"
+  "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unsigned_fix:SI
+	    (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
+  "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
+  "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*fixuns_trunc<mode>_1"
   [(set (match_operand:SI 0 "register_operand" "=&x,&x")
 	(unsigned_fix:SI
@@ -5003,37 +4832,10 @@
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
 
-(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
-  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
-	(fix:SWI248x (match_operand 1 "register_operand")))]
-  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && TARGET_FISTTP
-   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
-	 && (TARGET_64BIT || <MODE>mode != DImode))
-	&& TARGET_SSE_MATH)
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
-  else
-    {
-      operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-      emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
-							    operands[1],
-							    operands[2]));
-    }
-  DONE;
-}
-  [(set_attr "type" "fisttp")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "fix_trunc<mode>_i387_fisttp"
-  [(set (match_operand:SWI248x 0 "memory_operand" "=m")
+  [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
 	(fix:SWI248x (match_operand 1 "register_operand" "f")))
-   (clobber (match_scratch:XF 2 "=&1f"))]
+   (clobber (match_scratch:XF 2 "=&f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && TARGET_FISTTP
    && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
@@ -5043,39 +4845,6 @@
   [(set_attr "type" "fisttp")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
-  [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m,?r")
-	(fix:SWI248x (match_operand 1 "register_operand" "f,f")))
-   (clobber (match_operand:SWI248x 2 "memory_operand" "=X,m"))
-   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
-  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && TARGET_FISTTP
-   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
-	&& (TARGET_64BIT || <MODE>mode != DImode))
-	&& TARGET_SSE_MATH)"
-  "#"
-  [(set_attr "type" "fisttp")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:SWI248x 0 "register_operand")
-	(fix:SWI248x (match_operand 1 "register_operand")))
-   (clobber (match_operand:SWI248x 2 "memory_operand"))
-   (clobber (match_scratch 3))]
-  "reload_completed"
-  [(parallel [(set (match_dup 2) (fix:SWI248x (match_dup 1)))
-	      (clobber (match_dup 3))])
-   (set (match_dup 0) (match_dup 2))])
-
-(define_split
-  [(set (match_operand:SWI248x 0 "memory_operand")
-	(fix:SWI248x (match_operand 1 "register_operand")))
-   (clobber (match_operand:SWI248x 2 "memory_operand"))
-   (clobber (match_scratch 3))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (fix:SWI248x (match_dup 1)))
-	      (clobber (match_dup 3))])])
-
 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
@@ -5098,16 +4867,9 @@
 
   operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
   operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
-					 operands[2], operands[3]));
-  else
-    {
-      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-      emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
-						     operands[2], operands[3],
-						     operands[4]));
-    }
+
+  emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+				       operands[2], operands[3]));
   DONE;
 }
   [(set_attr "type" "fistp")
@@ -5115,11 +4877,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "fix_truncdi_i387"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
 	(fix:DI (match_operand 1 "register_operand" "f")))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))
-   (clobber (match_scratch:XF 4 "=&1f"))]
+   (clobber (match_scratch:XF 4 "=&f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && !TARGET_FISTTP
    && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
@@ -5128,50 +4890,8 @@
    (set_attr "i387_cw" "trunc")
    (set_attr "mode" "DI")])
 
-(define_insn "fix_truncdi_i387_with_temp"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
-	(fix:DI (match_operand 1 "register_operand" "f,f")))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
-   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
-  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !TARGET_FISTTP
-   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "DI")])
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(fix:DI (match_operand 1 "register_operand")))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:DI 4 "memory_operand"))
-   (clobber (match_scratch 5))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
-	      (use (match_dup 2))
-	      (use (match_dup 3))
-	      (clobber (match_dup 5))])
-   (set (match_dup 0) (match_dup 4))])
-
-(define_split
-  [(set (match_operand:DI 0 "memory_operand")
-	(fix:DI (match_operand 1 "register_operand")))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:DI 4 "memory_operand"))
-   (clobber (match_scratch 5))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
-	      (use (match_dup 2))
-	      (use (match_dup 3))
-	      (clobber (match_dup 5))])])
-
 (define_insn "fix_trunc<mode>_i387"
-  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
 	(fix:SWI24 (match_operand 1 "register_operand" "f")))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))]
@@ -5183,46 +4903,9 @@
    (set_attr "i387_cw" "trunc")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fix_trunc<mode>_i387_with_temp"
-  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r")
-	(fix:SWI24 (match_operand 1 "register_operand" "f,f")))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))]
-  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !TARGET_FISTTP
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand")
-	(fix:SWI24 (match_operand 1 "register_operand")))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:SWI24 4 "memory_operand"))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4) (fix:SWI24 (match_dup 1)))
-	      (use (match_dup 2))
-	      (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))])
-
-(define_split
-  [(set (match_operand:SWI24 0 "memory_operand")
-	(fix:SWI24 (match_operand 1 "register_operand")))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:SWI24 4 "memory_operand"))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (fix:SWI24 (match_dup 1)))
-	      (use (match_dup 2))
-	      (use (match_dup 3))])])
-
 (define_insn "x86_fnstcw_1"
   [(set (match_operand:HI 0 "memory_operand" "=m")
-	(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+	(unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
   "TARGET_80387"
   "fnstcw\t%0"
   [(set (attr "length")
@@ -5230,19 +4913,6 @@
    (set_attr "mode" "HI")
    (set_attr "unit" "i387")
    (set_attr "bdver1_decode" "vector")])
-
-(define_insn "x86_fldcw_1"
-  [(set (reg:HI FPCR_REG)
-	(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
-  "TARGET_80387"
-  "fldcw\t%0"
-  [(set (attr "length")
-	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
-   (set_attr "mode" "HI")
-   (set_attr "unit" "i387")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")
-   (set_attr "bdver1_decode" "vector")])
 
 ;; Conversion between fixed point and floating point.
 
@@ -5272,36 +4942,19 @@
    (set_attr "znver1_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "float<SWI48:mode><MODEF:mode>2"
+(define_expand "float<SWI48x:mode><MODEF:mode>2"
   [(set (match_operand:MODEF 0 "register_operand")
-	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
-  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
-{
-  if (!(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
-      && !X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
-    {
-      rtx reg = gen_reg_rtx (XFmode);
-      rtx (*insn)(rtx, rtx);
-
-      emit_insn (gen_float<SWI48:mode>xf2 (reg, operands[1]));
-
-      if (<MODEF:MODE>mode == SFmode)
-	insn = gen_truncxfsf2;
-      else if (<MODEF:MODE>mode == DFmode)
-	insn = gen_truncxfdf2;
-      else
-	gcc_unreachable ();
-
-      emit_insn (insn (operands[0], reg));
-      DONE;
-    }
-})
-
-(define_insn "*float<SWI48:mode><MODEF:mode>2_mixed"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,Yc,v")
+	(float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
+  "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
+   || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+       && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
+
+(define_insn "*float<SWI48:mode><MODEF:mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
 	(float:MODEF
 	  (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
-  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
+   || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
   "@
    fild%Z1\t%1
    %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
@@ -5322,17 +4975,28 @@
    (set_attr "znver1_decode" "double,*,*")
    (set_attr "fp_int_src" "true")
    (set (attr "enabled")
-     (cond [(eq_attr "alternative" "0")
-              (symbol_ref "TARGET_MIX_SSE_I387
-                           && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
-                                                <SWI48:MODE>mode)")
-           ]
-           (symbol_ref "true")))])
-
-(define_insn "*float<SWI48x:mode><MODEF:mode>2_i387"
+     (if_then_else
+       (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
+       (if_then_else
+	 (eq_attr "alternative" "0")
+	 (symbol_ref "TARGET_MIX_SSE_I387
+		      && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
+					   <SWI48:MODE>mode)")
+	 (symbol_ref "true"))
+       (if_then_else
+	 (eq_attr "alternative" "0")
+	 (symbol_ref "true")
+	 (symbol_ref "false"))))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
+	   (symbol_ref "true")))])
+
+(define_insn "*floatdi<MODEF:mode>2_i387"
   [(set (match_operand:MODEF 0 "register_operand" "=f")
-	(float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)"
+	(float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
   "fild%Z1\t%1"
   [(set_attr "type" "fmov")
    (set_attr "mode" "<MODEF:MODE>")
@@ -5367,6 +5031,56 @@
   DONE;
 })
 
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(float:X87MODEF
+	  (match_operand:DI 1 "register_operand")))]
+  "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
+   && can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_floatdi<mode>2_i387_with_xmm
+	     (operands[0], operands[1],
+	      assign_386_stack_local (DImode, SLOT_TEMP)));
+  DONE;
+})
+
+(define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF
+	  (match_operand:DI 1 "register_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=x"))
+   (clobber (match_scratch:V4SI 4 "=x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=m"))]
+  "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+			      gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+			      gen_highpart (SImode, operands[1])));
+  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
+					 operands[4]));
+
+  operands[3] = gen_lowpart (DImode, operands[3]);
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
 ;; Avoid partial SSE register dependency stalls.  This splitter should split
 ;; late in the pass sequence (after register rename pass), so allocated
 ;; registers won't change anymore
@@ -5392,114 +5106,6 @@
   emit_move_insn (operands[0], CONST0_RTX (vmode));
 })
 
-;; Break partial reg stall for cvtsd2ss.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
-  [(set (match_operand:SF 0 "sse_reg_operand")
-        (float_truncate:SF
-	  (match_operand:DF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
-   && optimize_function_for_speed_p (cfun)
-   && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
-   && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL)"
-  [(set (match_dup 0)
-	(vec_merge:V4SF
-	  (vec_duplicate:V4SF
-	    (float_truncate:SF
-	      (match_dup 1)))
-	  (match_dup 0)
-	  (const_int 1)))]
-{
-  operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
-  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
-})
-
-;; Break partial reg stall for cvtss2sd.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
-  [(set (match_operand:DF 0 "sse_reg_operand")
-        (float_extend:DF
-          (match_operand:SF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
-   && optimize_function_for_speed_p (cfun)
-   && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
-   && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL)"
-  [(set (match_dup 0)
-        (vec_merge:V2DF
-	  (vec_duplicate:V2DF
-	    (float_extend:DF
-	      (match_dup 1)))
-	  (match_dup 0)
-          (const_int 1)))]
-{
-  operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
-  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
-})
-
-;; Avoid store forwarding (partial memory) stall penalty
-;; by passing DImode value through XMM registers.  */
-
-(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
-  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
-	(float:X87MODEF
-	  (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
-   (clobber (match_scratch:V4SI 3 "=X,x"))
-   (clobber (match_scratch:V4SI 4 "=X,x"))
-   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
-   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
-  "#"
-  [(set_attr "type" "multi")
-   (set_attr "mode" "<X87MODEF:MODE>")
-   (set_attr "unit" "i387")
-   (set_attr "fp_int_src" "true")])
-
-(define_split
-  [(set (match_operand:X87MODEF 0 "fp_register_operand")
-	(float:X87MODEF (match_operand:DI 1 "register_operand")))
-   (clobber (match_scratch:V4SI 3))
-   (clobber (match_scratch:V4SI 4))
-   (clobber (match_operand:DI 2 "memory_operand"))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
-   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
-   && reload_completed"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
-{
-  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
-     Assemble the 64-bit DImode value in an xmm register.  */
-  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
-			      gen_lowpart (SImode, operands[1])));
-  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
-			      gen_highpart (SImode, operands[1])));
-  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
-					 operands[4]));
-
-  operands[3] = gen_lowpart (DImode, operands[3]);
-})
-
-(define_split
-  [(set (match_operand:X87MODEF 0 "fp_register_operand")
-	(float:X87MODEF (match_operand:DI 1 "memory_operand")))
-   (clobber (match_scratch:V4SI 3))
-   (clobber (match_scratch:V4SI 4))
-   (clobber (match_operand:DI 2 "memory_operand"))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
-   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
-   && reload_completed"
-  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
-
 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
   [(set (match_operand:MODEF 0 "register_operand")
 	(unsigned_float:MODEF
@@ -5512,16 +5118,26 @@
   DONE;
 })
 
+(define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
+	(unsigned_float:MODEF
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_AVX512F && TARGET_SSE_MATH"
+  "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODEF:MODE>")])
+
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
 ;; SImode values to stack. Also note that fild loads from memory only.
 
-(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm"
+(define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
   [(set (match_operand:X87MODEF 0 "register_operand" "=f")
 	(unsigned_float:X87MODEF
 	  (match_operand:SI 1 "nonimmediate_operand" "rm")))
-   (clobber (match_scratch:DI 3 "=x"))
-   (clobber (match_operand:DI 2 "memory_operand" "=m"))]
+   (clobber (match_operand:DI 2 "memory_operand" "=m"))
+   (clobber (match_scratch:DI 3 "=x"))]
   "!TARGET_64BIT
    && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
    && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
@@ -5536,43 +5152,59 @@
    (set_attr "mode" "<MODE>")])
 
 (define_expand "floatunssi<mode>2"
-  [(parallel
-     [(set (match_operand:X87MODEF 0 "register_operand")
-	   (unsigned_float:X87MODEF
-	     (match_operand:SI 1 "nonimmediate_operand")))
-      (clobber (match_scratch:DI 3))
-      (clobber (match_dup 2))])]
-  "!TARGET_64BIT
-   && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-	&& TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
-       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
-{
-  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+  [(set (match_operand:X87MODEF 0 "register_operand")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "nonimmediate_operand")))]
+  "(!TARGET_64BIT
+    && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+    && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
+   || ((!TARGET_64BIT || TARGET_AVX512F)
+       && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+    {
+      emit_insn (gen_floatunssi<mode>2_i387_with_xmm
+		  (operands[0], operands[1],
+		   assign_386_stack_local (DImode, SLOT_TEMP)));
+      DONE;
+    }
+  if (!TARGET_AVX512F)
     {
       ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
       DONE;
     }
-  else
-    operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
 })
 
 (define_expand "floatunsdisf2"
-  [(use (match_operand:SF 0 "register_operand"))
-   (use (match_operand:DI 1 "nonimmediate_operand"))]
+  [(set (match_operand:SF 0 "register_operand")
+	(unsigned_float:SF
+	  (match_operand:DI 1 "nonimmediate_operand")))]
   "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
-  "x86_emit_floatuns (operands); DONE;")
+{
+  if (!TARGET_AVX512F)
+    {
+      x86_emit_floatuns (operands);
+      DONE;
+    }
+})
 
 (define_expand "floatunsdidf2"
-  [(use (match_operand:DF 0 "register_operand"))
-   (use (match_operand:DI 1 "nonimmediate_operand"))]
-  "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+  [(set (match_operand:DF 0 "register_operand")
+	(unsigned_float:DF
+	  (match_operand:DI 1 "nonimmediate_operand")))]
+  "(TARGET_KEEPS_VECTOR_ALIGNED_STACK || TARGET_AVX512F)
    && TARGET_SSE2 && TARGET_SSE_MATH"
 {
-  if (TARGET_64BIT)
-    x86_emit_floatuns (operands);
-  else
-    ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
-  DONE;
+  if (!TARGET_64BIT)
+    {
+      ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+      DONE;
+    }
+  if (!TARGET_AVX512F)
+    {
+      x86_emit_floatuns (operands);
+      DONE;
+    }
 })
 
 ;; Load effective address instructions
@@ -6823,6 +6455,20 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*add<mode>3_carry_0"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(plus:SWI
+	  (match_operator:SWI 3 "ix86_carry_flag_operator"
+	    [(match_operand 2 "flags_reg_operand") (const_int 0)])
+	  (match_operand:SWI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)"
+  "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*addsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
@@ -6839,6 +6485,20 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+(define_insn "*addsi3_carry_zext_0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
+		    [(reg FLAGS_REG) (const_int 0)])
+		   (match_operand:SI 1 "register_operand" "0"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "adc{l}\t{$0, %k0|%k0, 0}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
 
 (define_insn "addcarry<mode>"
@@ -6895,6 +6555,20 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*sub<mode>3_carry_0"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0")
+	  (match_operator:SWI 3 "ix86_carry_flag_operator"
+	    [(match_operand 2 "flags_reg_operand") (const_int 0)])))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*subsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
@@ -6912,6 +6586,21 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+(define_insn "*subsi3_carry_zext_0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (match_operand:SI 1 "register_operand" "0")
+	    (match_operator:SI 2 "ix86_carry_flag_operator"
+	      [(reg FLAGS_REG) (const_int 0)]))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "sbb{l}\t{$0, %k0|%k0, 0}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
 (define_insn "sub<mode>3_carry_ccc"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
@@ -7577,16 +7266,16 @@
    (set_attr "mode" "QI")])
 
 (define_expand "<s>mul<mode>3_highpart"
-  [(parallel [(set (match_operand:SWI48 0 "register_operand")
-		   (truncate:SWI48
+  [(parallel [(set (match_operand:DWIH 0 "register_operand")
+		   (truncate:DWIH
 		     (lshiftrt:<DWI>
 		       (mult:<DWI>
 			 (any_extend:<DWI>
-			   (match_operand:SWI48 1 "nonimmediate_operand"))
+			   (match_operand:DWIH 1 "nonimmediate_operand"))
 			 (any_extend:<DWI>
-			   (match_operand:SWI48 2 "register_operand")))
+			   (match_operand:DWIH 2 "register_operand")))
 		       (match_dup 3))))
-	      (clobber (match_scratch:SWI48 4))
+	      (clobber (match_scratch:DWIH 4))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
   "operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
@@ -9225,14 +8914,14 @@
 })
 
 (define_insn "*andndi3_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+  [(set (match_operand:DI 0 "register_operand" "=&r,r,r,&r")
 	(and:DI
-	  (not:DI (match_operand:DI 1 "register_operand" "r,0"))
-	  (match_operand:DI 2 "nonimmediate_operand" "rm,rm")))
+	  (not:DI (match_operand:DI 1 "register_operand" "r,0,r,0"))
+	  (match_operand:DI 2 "nonimmediate_operand" "rm,rm,0,rm")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
   "#"
-  [(set_attr "isa" "bmi,*")])
+  [(set_attr "isa" "bmi,bmi,bmi,*")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -9983,36 +9672,9 @@
   "TARGET_80387
    && (reload_completed
        || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
-  "f<absneg_mnemonic>"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<code>extendsfdf2"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-	(absneg:DF (float_extend:DF
-		     (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "f<absneg_mnemonic>"
+  "<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*<code>extendsfxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(absneg:XF (float_extend:XF
-		     (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_80387"
-  "f<absneg_mnemonic>"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
-
-(define_insn "*<code>extenddfxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(absneg:XF (float_extend:XF
-		     (match_operand:DF 1 "register_operand" "0"))))]
-  "TARGET_80387"
-  "f<absneg_mnemonic>"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; Copysign instructions
 
@@ -10030,7 +9692,7 @@
 (define_insn_and_split "copysign<mode>3_const"
   [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv")
 	(unspec:CSGNMODE
-	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "YvmC")
+	  [(match_operand:<CSGNVMODE> 1 "nonimm_or_0_operand" "YvmC")
 	   (match_operand:CSGNMODE 2 "register_operand" "0")
 	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "Yvm")]
 	  UNSPEC_COPYSIGN))]
@@ -10209,6 +9871,87 @@
   ""
   "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
 
+(define_insn_and_split "*ashl<dwi>3_doubleword_mask"
+  [(set (match_operand:<DWI> 0 "register_operand")
+	(ashift:<DWI>
+	  (match_operand:<DWI> 1 "register_operand")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 6)
+	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
+		     (lshiftrt:DWIH (match_dup 5)
+		       (minus:QI (match_dup 8) (match_dup 2)))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 4)
+	   (ashift:DWIH (match_dup 5) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
+
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+
+  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+    {
+      rtx tem = gen_reg_rtx (SImode);
+      emit_insn (gen_andsi3 (tem, operands[2], operands[3]));
+      operands[2] = tem;
+    }
+
+  operands[2] = gen_lowpart (QImode, operands[2]);
+
+  if (!rtx_equal_p (operands[6], operands[7]))
+    emit_move_insn (operands[6], operands[7]);
+})
+
+(define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
+  [(set (match_operand:<DWI> 0 "register_operand")
+	(ashift:<DWI>
+	  (match_operand:<DWI> 1 "register_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand" "c")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 6)
+	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
+		     (lshiftrt:DWIH (match_dup 5)
+		       (minus:QI (match_dup 8) (match_dup 2)))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 4)
+	   (ashift:DWIH (match_dup 5) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
+
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+
+  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+    {
+      rtx tem = gen_reg_rtx (QImode);
+      emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
+      operands[2] = tem;
+    }
+
+  if (!rtx_equal_p (operands[6], operands[7]))
+    emit_move_insn (operands[6], operands[7]);
+})
+
 (define_insn "*ashl<mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=&r")
 	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
@@ -10328,7 +10071,7 @@
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (subreg:QI
 	    (and:SI
-	      (match_operand:SI 2 "register_operand")
+	      (match_operand:SI 2 "register_operand" "c,r")
 	      (match_operand:SI 3 "const_int_operand")) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
@@ -10342,14 +10085,15 @@
 	   (ashift:SWI48 (match_dup 1)
 			 (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = gen_lowpart (QImode, operands[2]);")
+  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  [(set_attr "isa" "*,bmi2")])
 
 (define_insn_and_split "*ashl<mode>3_mask_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand")
 	(ashift:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (and:QI
-	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 2 "register_operand" "c,r")
 	    (match_operand:QI 3 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
@@ -10362,7 +10106,9 @@
      [(set (match_dup 0)
 	   (ashift:SWI48 (match_dup 1)
 			 (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])])
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "isa" "*,bmi2")])
 
 (define_insn "*bmi2_ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
@@ -10622,7 +10368,7 @@
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_ALU:
+    case TYPE_ALU1:
       gcc_assert (operands[1] == const1_rtx);
       return "add{b}\t%0, %0";
 
@@ -10638,12 +10384,12 @@
      (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
 		      (match_operand 0 "register_operand"))
 		 (match_operand 1 "const1_operand"))
-	      (const_string "alu")
+	      (const_string "alu1")
 	   ]
 	   (const_string "ishift1")))
    (set (attr "length_immediate")
      (if_then_else
-       (ior (eq_attr "type" "alu")
+       (ior (eq_attr "type" "alu1")
 	    (and (eq_attr "type" "ishift1")
 		 (and (match_operand 1 "const1_operand")
 		      (ior (match_test "TARGET_SHIFT1")
@@ -10848,7 +10594,7 @@
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (subreg:QI
 	    (and:SI
-	      (match_operand:SI 2 "register_operand")
+	      (match_operand:SI 2 "register_operand" "c,r")
 	      (match_operand:SI 3 "const_int_operand")) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
@@ -10862,14 +10608,15 @@
 	   (any_shiftrt:SWI48 (match_dup 1)
 			      (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = gen_lowpart (QImode, operands[2]);")
+  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  [(set_attr "isa" "*,bmi2")])
 
 (define_insn_and_split "*<shift_insn><mode>3_mask_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand")
 	(any_shiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (and:QI
-	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 2 "register_operand" "c,r")
 	    (match_operand:QI 3 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
@@ -10882,7 +10629,90 @@
      [(set (match_dup 0)
 	   (any_shiftrt:SWI48 (match_dup 1)
 			      (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])])
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask"
+  [(set (match_operand:<DWI> 0 "register_operand")
+	(any_shiftrt:<DWI>
+	  (match_operand:<DWI> 1 "register_operand")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 4)
+	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
+		     (ashift:DWIH (match_dup 7)
+		       (minus:QI (match_dup 8) (match_dup 2)))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 6)
+	   (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
+
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+
+  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+    {
+      rtx tem = gen_reg_rtx (SImode);
+      emit_insn (gen_andsi3 (tem, operands[2], operands[3]));
+      operands[2] = tem;
+    }
+
+  operands[2] = gen_lowpart (QImode, operands[2]);
+
+  if (!rtx_equal_p (operands[4], operands[5]))
+    emit_move_insn (operands[4], operands[5]);
+})
+
+(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask_1"
+  [(set (match_operand:<DWI> 0 "register_operand")
+	(any_shiftrt:<DWI>
+	  (match_operand:<DWI> 1 "register_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand" "c")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 4)
+	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
+		     (ashift:DWIH (match_dup 7)
+		       (minus:QI (match_dup 8) (match_dup 2)))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 6)
+	   (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
+
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+
+  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
+    {
+      rtx tem = gen_reg_rtx (QImode);
+      emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
+      operands[2] = tem;
+    }
+
+  if (!rtx_equal_p (operands[4], operands[5]))
+    emit_move_insn (operands[4], operands[5]);
+})
 
 (define_insn_and_split "*<shift_insn><mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=&r")
@@ -11327,7 +11157,7 @@
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (subreg:QI
 	    (and:SI
-	      (match_operand:SI 2 "register_operand")
+	      (match_operand:SI 2 "register_operand" "c")
 	      (match_operand:SI 3 "const_int_operand")) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
@@ -11348,7 +11178,7 @@
 	(any_rotate:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand")
 	  (and:QI
-	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 2 "register_operand" "c")
 	    (match_operand:QI 3 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
@@ -11612,7 +11442,7 @@
    (set_attr "mode" "QI")])
 
 (define_split
- [(set (match_operand:HI 0 "register_operand")
+ [(set (match_operand:HI 0 "QIreg_operand")
        (any_rotate:HI (match_dup 0) (const_int 8)))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
@@ -12204,8 +12034,7 @@
 	       (lt (minus (match_dup 0) (pc))
 		   (const_int 128)))
 	  (const_int 2)
-	  (const_int 6)))
-   (set_attr "maybe_prefix_bnd" "1")])
+	  (const_int 6)))])
 
 ;; In general it is not safe to assume too much about CCmode registers,
 ;; so simplify-rtx stops when it sees a second one.  Under certain
@@ -12273,24 +12102,27 @@
 	       (lt (minus (match_dup 0) (pc))
 		   (const_int 128)))
 	  (const_int 2)
-	  (const_int 5)))
-   (set_attr "maybe_prefix_bnd" "1")])
+	  (const_int 5)))])
 
 (define_expand "indirect_jump"
   [(set (pc) (match_operand 0 "indirect_branch_operand"))]
   ""
 {
-  if (TARGET_X32)
+  if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
     operands[0] = convert_memory_address (word_mode, operands[0]);
+  cfun->machine->has_local_indirect_jump = true;
 })
 
 (define_insn "*indirect_jump"
   [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
   ""
-  "%!jmp\t%A0"
-  [(set_attr "type" "ibr")
-   (set_attr "length_immediate" "0")
-   (set_attr "maybe_prefix_bnd" "1")])
+  "* return ix86_output_indirect_jmp (operands[0]);"
+  [(set (attr "type")
+     (if_then_else (match_test "(cfun->machine->indirect_branch_type
+				 != indirect_branch_keep)")
+	(const_string "multi")
+	(const_string "ibr")))
+   (set_attr "length_immediate" "0")])
 
 (define_expand "tablejump"
   [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
@@ -12329,18 +12161,22 @@
 					 OPTAB_DIRECT);
     }
 
-  if (TARGET_X32)
+  if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
     operands[0] = convert_memory_address (word_mode, operands[0]);
+  cfun->machine->has_local_indirect_jump = true;
 })
 
 (define_insn "*tablejump_1"
   [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
    (use (label_ref (match_operand 1)))]
   ""
-  "%!jmp\t%A0"
-  [(set_attr "type" "ibr")
-   (set_attr "length_immediate" "0")
-   (set_attr "maybe_prefix_bnd" "1")])
+  "* return ix86_output_indirect_jmp (operands[0]);"
+  [(set (attr "type")
+     (if_then_else (match_test "(cfun->machine->indirect_branch_type
+				 != indirect_branch_keep)")
+	(const_string "multi")
+	(const_string "ibr")))
+   (set_attr "length_immediate" "0")])
 
 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
 
@@ -12375,6 +12211,7 @@
   "(peep2_reg_dead_p (3, operands[1])
     || operands_match_p (operands[1], operands[3]))
    && ! reg_overlap_mentioned_p (operands[3], operands[0])
+   && ! reg_overlap_mentioned_p (operands[3], operands[4])
    && ! reg_set_p (operands[3], operands[4])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 5) (match_dup 0))
@@ -12400,6 +12237,7 @@
     || operands_match_p (operands[2], operands[4]))
    && ! reg_overlap_mentioned_p (operands[4], operands[0])
    && ! reg_overlap_mentioned_p (operands[4], operands[1])
+   && ! reg_overlap_mentioned_p (operands[4], operands[5])
    && ! reg_set_p (operands[4], operands[5])
    && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
    && peep2_regno_dead_p (0, FLAGS_REG)"
@@ -12449,6 +12287,7 @@
   "(peep2_reg_dead_p (3, operands[1])
     || operands_match_p (operands[1], operands[3]))
    && ! reg_overlap_mentioned_p (operands[3], operands[0])
+   && ! reg_overlap_mentioned_p (operands[3], operands[4])
    && ! reg_set_p (operands[3], operands[4])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 5) (match_dup 0))
@@ -12475,6 +12314,7 @@
     || operands_match_p (operands[2], operands[4]))
    && ! reg_overlap_mentioned_p (operands[4], operands[0])
    && ! reg_overlap_mentioned_p (operands[4], operands[1])
+   && ! reg_overlap_mentioned_p (operands[4], operands[5])
    && ! reg_set_p (operands[4], operands[5])
    && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
    && peep2_regno_dead_p (0, FLAGS_REG)"
@@ -12554,7 +12394,10 @@
 		     (match_operand:SI 0 "register_no_elim_operand" "U")
 		     (match_operand:SI 1 "GOT32_symbol_operand"))))
 	 (match_operand 2))]
-  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "!TARGET_MACHO
+  && !TARGET_64BIT
+  && !TARGET_INDIRECT_BRANCH_REGISTER
+  && SIBLING_CALL_P (insn)"
 {
   rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
   fnaddr = gen_const_mem (SImode, fnaddr);
@@ -12573,7 +12416,7 @@
   [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
 	 (match_operand 1))
    (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
-  "!TARGET_X32"
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
@@ -12582,7 +12425,9 @@
 	(match_operand:W 1 "memory_operand"))
    (call (mem:QI (match_dup 0))
 	 (match_operand 3))]
-  "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (1))
+  "!TARGET_X32
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && SIBLING_CALL_P (peep2_next_insn (1))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
   [(parallel [(call (mem:QI (match_dup 1))
@@ -12595,7 +12440,9 @@
    (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
    (call (mem:QI (match_dup 0))
 	 (match_operand 3))]
-  "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (2))
+  "!TARGET_X32
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && SIBLING_CALL_P (peep2_next_insn (2))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
   [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
@@ -12617,7 +12464,7 @@
 })
 
 (define_insn "*call_pop"
-  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lmBz"))
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
 	 (match_operand 1))
    (set (reg:SI SP_REG)
 	(plus:SI (reg:SI SP_REG)
@@ -12637,7 +12484,7 @@
   [(set_attr "type" "call")])
 
 (define_insn "*sibcall_pop_memory"
-  [(call (mem:QI (match_operand:SI 0 "memory_operand" "m"))
+  [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
 	 (match_operand 1))
    (set (reg:SI SP_REG)
 	(plus:SI (reg:SI SP_REG)
@@ -12691,7 +12538,9 @@
   [(set (match_operand:W 0 "register_operand")
         (match_operand:W 1 "memory_operand"))
    (set (pc) (match_dup 0))]
-  "!TARGET_X32 && peep2_reg_dead_p (2, operands[0])"
+  "!TARGET_X32
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && peep2_reg_dead_p (2, operands[0])"
   [(set (pc) (match_dup 1))])
 
 ;; Call subroutine, returning value in operand 0
@@ -12751,7 +12600,10 @@
 			  (match_operand:SI 1 "register_no_elim_operand" "U")
 			  (match_operand:SI 2 "GOT32_symbol_operand"))))
 	 (match_operand 3)))]
-  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "!TARGET_MACHO
+   && !TARGET_64BIT
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && SIBLING_CALL_P (insn)"
 {
   rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
   fnaddr = gen_const_mem (SImode, fnaddr);
@@ -12772,7 +12624,7 @@
  	(call (mem:QI (match_operand:W 1 "memory_operand" "m"))
 	      (match_operand 2)))
    (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
-  "!TARGET_X32"
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
@@ -12782,7 +12634,9 @@
    (set (match_operand 2)
    (call (mem:QI (match_dup 0))
 		 (match_operand 3)))]
-  "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (1))
+  "!TARGET_X32
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && SIBLING_CALL_P (peep2_next_insn (1))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
   [(parallel [(set (match_dup 2)
@@ -12797,7 +12651,9 @@
    (set (match_operand 2)
 	(call (mem:QI (match_dup 0))
 	      (match_operand 3)))]
-  "!TARGET_X32 && SIBLING_CALL_P (peep2_next_insn (2))
+  "!TARGET_X32
+   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && SIBLING_CALL_P (peep2_next_insn (2))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
   [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
@@ -12822,7 +12678,7 @@
 
 (define_insn "*call_value_pop"
   [(set (match_operand 0)
-	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lmBz"))
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
 	      (match_operand 2)))
    (set (reg:SI SP_REG)
 	(plus:SI (reg:SI SP_REG)
@@ -13032,12 +12888,11 @@
 (define_insn "simple_return_internal"
   [(simple_return)]
   "reload_completed"
-  "%!ret"
+  "* return ix86_output_function_return (false);"
   [(set_attr "length" "1")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
-   (set_attr "modrm" "0")
-   (set_attr "maybe_prefix_bnd" "1")])
+   (set_attr "modrm" "0")])
 
 (define_insn "interrupt_return"
   [(simple_return)
@@ -13054,37 +12909,42 @@
   [(simple_return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-{
-  if (ix86_bnd_prefixed_insn_p (insn))
-    return "%!ret";
-
-  return "rep%; ret";
-}
+  "* return ix86_output_function_return (true);"
   [(set_attr "length" "2")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
    (set_attr "prefix_rep" "1")
    (set_attr "modrm" "0")])
 
-(define_insn "simple_return_pop_internal"
+(define_insn_and_split "simple_return_pop_internal"
   [(simple_return)
    (use (match_operand:SI 0 "const_int_operand"))]
   "reload_completed"
   "%!ret\t%0"
+  "&& cfun->machine->function_return_type != indirect_branch_keep"
+  [(const_int 0)]
+  "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
   [(set_attr "length" "3")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "2")
-   (set_attr "modrm" "0")
-   (set_attr "maybe_prefix_bnd" "1")])
-
-(define_insn "simple_return_indirect_internal"
+   (set_attr "modrm" "0")])
+
+(define_expand "simple_return_indirect_internal"
+  [(parallel
+     [(simple_return)
+      (use (match_operand 0 "register_operand"))])])
+
+(define_insn "*simple_return_indirect_internal<mode>"
   [(simple_return)
-   (use (match_operand:SI 0 "register_operand" "r"))]
+   (use (match_operand:W 0 "register_operand" "r"))]
   "reload_completed"
-  "%!jmp\t%A0"
-  [(set_attr "type" "ibr")
-   (set_attr "length_immediate" "0")
-   (set_attr "maybe_prefix_bnd" "1")])
+  "* return ix86_output_indirect_function_return (operands[0]);"
+  [(set (attr "type")
+     (if_then_else (match_test "(cfun->machine->indirect_branch_type
+				 != indirect_branch_keep)")
+	(const_string "multi")
+	(const_string "ibr")))
+   (set_attr "length_immediate" "0")])
 
 (define_insn "nop"
   [(const_int 0)]
@@ -13187,7 +13047,6 @@
   "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
   [(set_attr "type" "lea")
    (set_attr "length_address" "4")
-   (set_attr "modrm_class" "unknown")
    (set_attr "mode" "DI")])
 
 (define_insn "set_rip_rex64"
@@ -13232,7 +13091,10 @@
      stack address we wish to restore.  */
   tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
   tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
-  tmp = gen_rtx_MEM (Pmode, tmp);
+  /* Return address is always in word_mode.  */
+  tmp = gen_rtx_MEM (word_mode, tmp);
+  if (GET_MODE (ra) != word_mode)
+    ra = convert_to_mode (word_mode, ra, 1);
   emit_move_insn (tmp, ra);
 
   emit_jump_insn (gen_eh_return_internal ());
@@ -13724,6 +13586,43 @@
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi_blsr_<mode>_cmp"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:SWI48
+	    (plus:SWI48
+	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	      (const_int -1))
+	    (match_dup 1))
+	  (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48
+	  (plus:SWI48
+	    (match_dup 1)
+	    (const_int -1))
+	  (match_dup 1)))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsr_<mode>_ccz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:SWI48
+	    (plus:SWI48
+	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	      (const_int -1))
+	    (match_dup 1))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI48 0 "=r"))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
 ;; BMI2 instructions.
 (define_expand "bmi2_bzhi_<mode>3"
   [(parallel
@@ -14067,8 +13966,8 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     bswap\t%0
-    movbe\t{%1, %0|%0, %1}
-    movbe\t{%1, %0|%0, %1}"
+    movbe{<imodesuffix>}\t{%1, %0|%0, %1}
+    movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "bitmanip,imov,imov")
    (set_attr "modrm" "0,1,1")
    (set_attr "prefix_0f" "*,1,1")
@@ -14084,26 +13983,58 @@
    (set_attr "modrm" "0")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*bswaphi_lowpart_1"
+(define_expand "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand")
+	(bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
+  "TARGET_MOVBE")
+
+(define_insn "*bswaphi2_movbe"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
+	(bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    xchg{b}\t{%h0, %b0|%b0, %h0}
+    movbe{w}\t{%1, %0|%0, %1}
+    movbe{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "*,1,1")
+   (set_attr "prefix_0f" "*,1,1")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "pent_pair" "np,*,*")
+   (set_attr "athlon_decode" "vector,*,*")
+   (set_attr "amdfam10_decode" "double,*,*")
+   (set_attr "bdver1_decode" "double,*,*")
+   (set_attr "mode" "QI,HI,HI")])
+
+(define_peephole2
+  [(set (match_operand:HI 0 "general_reg_operand")
+	(bswap:HI (match_dup 0)))]
+  "TARGET_MOVBE
+   && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "bswaphi_lowpart"
   [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
 	(bswap:HI (match_dup 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
+  ""
   "@
     xchg{b}\t{%h0, %b0|%b0, %h0}
     rol{w}\t{$8, %0|%0, 8}"
-  [(set_attr "length" "2,4")
+  [(set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "true")]
+	   (symbol_ref "false")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "TARGET_USE_XCHGB")]
+	   (symbol_ref "!TARGET_USE_XCHGB")))
+   (set_attr "length" "2,4")
    (set_attr "mode" "QI,HI")])
 
-(define_insn "bswaphi_lowpart"
-  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
-	(bswap:HI (match_dup 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "rol{w}\t{$8, %0|%0, 8}"
-  [(set_attr "length" "4")
-   (set_attr "mode" "HI")])
-
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand")
 	(parity:DI (match_operand:DI 1 "register_operand")))]
@@ -14273,7 +14204,18 @@
   "TARGET_64BIT"
 {
   if (!TARGET_X32)
-    fputs (ASM_BYTE "0x66\n", asm_out_file);
+    /* The .loc directive has effect for 'the immediately following assembly
+       instruction'.  So for a sequence:
+         .loc f l
+         .byte x
+         insn1
+       the 'immediately following assembly instruction' is insn1.
+       We want to emit an insn prefix here, but if we use .byte (as shown in
+       'ELF Handling For Thread-Local Storage'), a preceding .loc will point
+       inside the insn sequence, rather than to the start.  After relaxation
+       of the sequence by the linker, the .loc might point inside an insn.
+       Use data16 prefix instead, which doesn't have this problem.  */
+    fputs ("\tdata16", asm_out_file);
   output_asm_insn
     ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
   if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
@@ -14681,6 +14623,20 @@
 ;; Gcc is slightly more smart about handling normal two address instructions
 ;; so use special patterns for add and mull.
 
+(define_insn "*fop_xf_comm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "%0")
+			 (match_operand:XF 2 "register_operand" "f")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (match_operand:XF 3 "mult_operator")
+	   (const_string "fmul")
+	   (const_string "fop")))
+   (set_attr "mode" "XF")])
+
 (define_insn "*fop_<mode>_comm"
   [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
 	(match_operator:MODEF 3 "binary_fp_operator"
@@ -14716,17 +14672,33 @@
 	 (symbol_ref "false"))))])
 
 (define_insn "*rcpsf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
 		   UNSPEC_RCP))]
   "TARGET_SSE && TARGET_SSE_MATH"
-  "%vrcpss\t{%1, %d0|%d0, %1}"
+  "@
+   %vrcpss\t{%d1, %0|%0, %d1}
+   %vrcpss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "rcp")
    (set_attr "btver2_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
+(define_insn "*fop_xf_1_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "0,f")
+			 (match_operand:XF 2 "register_operand" "f,0")]))]
+  "TARGET_80387
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (match_operand:XF 3 "div_operator")
+	   (const_string "fdiv")
+	   (const_string "fop")))
+   (set_attr "mode" "XF")])
+
 (define_insn "*fop_<mode>_1"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
 	(match_operator:MODEF 3 "binary_fp_operator"
@@ -14763,49 +14735,65 @@
 	 (symbol_ref "true")
 	 (symbol_ref "false"))))])
 
-;; ??? Add SSE splitters for these!
-(define_insn "*fop_<MODEF:mode>_2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-	(match_operator:MODEF 3 "binary_fp_operator"
-	  [(float:MODEF
+(define_insn "*fop_<X87MODEF:mode>_2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(match_operator:X87MODEF 3 "binary_fp_operator"
+	  [(float:X87MODEF
 	     (match_operand:SWI24 1 "nonimmediate_operand" "m"))
-	   (match_operand:MODEF 2 "register_operand" "0")]))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
-   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+	   (match_operand:X87MODEF 2 "register_operand" "0")]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
        || optimize_function_for_size_p (cfun))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:MODEF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:MODEF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+	(cond [(match_operand:X87MODEF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:X87MODEF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<SWI24:MODE>")])
 
-(define_insn "*fop_<MODEF:mode>_3_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-	(match_operator:MODEF 3 "binary_fp_operator"
-	  [(match_operand:MODEF 1 "register_operand" "0")
-	   (float:MODEF
+(define_insn "*fop_<X87MODEF:mode>_3_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(match_operator:X87MODEF 3 "binary_fp_operator"
+	  [(match_operand:X87MODEF 1 "register_operand" "0")
+	   (float:X87MODEF
 	     (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
-   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
        || optimize_function_for_size_p (cfun))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:MODEF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:MODEF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+	(cond [(match_operand:X87MODEF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:X87MODEF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*fop_xf_4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	   [(float_extend:XF
+	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:XF 2 "register_operand" "0,f")]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(cond [(match_operand:XF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:XF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fop_df_4_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
 	(match_operator:DF 3 "binary_fp_operator"
@@ -14816,14 +14804,31 @@
    && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+	(cond [(match_operand:DF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:DF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
    (set_attr "mode" "SF")])
 
+(define_insn "*fop_xf_5_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,f")
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(cond [(match_operand:XF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:XF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fop_df_5_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
 	(match_operator:DF 3 "binary_fp_operator"
@@ -14834,14 +14839,32 @@
    && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+	(cond [(match_operand:DF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:DF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
    (set_attr "mode" "SF")])
 
+(define_insn "*fop_xf_6_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float_extend:XF
+	     (match_operand:MODEF 1 "register_operand" "0,f"))
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(cond [(match_operand:XF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:XF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fop_df_6_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
 	(match_operator:DF 3 "binary_fp_operator"
@@ -14853,131 +14876,13 @@
    && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+	(cond [(match_operand:DF 3 "mult_operator")
+		 (const_string "fmul")
+	       (match_operand:DF 3 "div_operator")
+		 (const_string "fdiv")
+	      ]
+	      (const_string "fop")))
    (set_attr "mode" "SF")])
-
-(define_insn "*fop_xf_comm_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(match_operator:XF 3 "binary_fp_operator"
-			[(match_operand:XF 1 "register_operand" "%0")
-			 (match_operand:XF 2 "register_operand" "f")]))]
-  "TARGET_80387
-   && COMMUTATIVE_ARITH_P (operands[3])"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (if_then_else (match_operand:XF 3 "mult_operator")
-           (const_string "fmul")
-           (const_string "fop")))
-   (set_attr "mode" "XF")])
-
-(define_insn "*fop_xf_1_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-	(match_operator:XF 3 "binary_fp_operator"
-			[(match_operand:XF 1 "register_operand" "0,f")
-			 (match_operand:XF 2 "register_operand" "f,0")]))]
-  "TARGET_80387
-   && !COMMUTATIVE_ARITH_P (operands[3])"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (if_then_else (match_operand:XF 3 "div_operator")
-           (const_string "fdiv")
-           (const_string "fop")))
-   (set_attr "mode" "XF")])
-
-(define_insn "*fop_xf_2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(match_operator:XF 3 "binary_fp_operator"
-	  [(float:XF
-	     (match_operand:SWI24 1 "nonimmediate_operand" "m"))
-	   (match_operand:XF 2 "register_operand" "0")]))]
-  "TARGET_80387
-   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_xf_3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(match_operator:XF 3 "binary_fp_operator"
-	  [(match_operand:XF 1 "register_operand" "0")
-	   (float:XF
-	     (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
-  "TARGET_80387
-   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_xf_4_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-	(match_operator:XF 3 "binary_fp_operator"
-	   [(float_extend:XF
-	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
-	    (match_operand:XF 2 "register_operand" "0,f")]))]
-  "TARGET_80387"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_xf_5_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-	(match_operator:XF 3 "binary_fp_operator"
-	  [(match_operand:XF 1 "register_operand" "0,f")
-	   (float_extend:XF
-	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
-  "TARGET_80387"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_xf_6_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-	(match_operator:XF 3 "binary_fp_operator"
-	  [(float_extend:XF
-	     (match_operand:MODEF 1 "register_operand" "0,f"))
-	   (float_extend:XF
-	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
-  "TARGET_80387"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "<MODE>")])
 
 ;; FPU special functions.
 
@@ -14985,7 +14890,7 @@
 ;; all fancy i386 XFmode math functions.
 
 (define_insn "truncxf<mode>2_i387_noop_unspec"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
+  [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
 	(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
 	UNSPEC_TRUNC_NOOP))]
   "TARGET_USE_FANCY_MATH_387"
@@ -15004,25 +14909,14 @@
    (set_attr "amdfam10_decode" "direct")
    (set_attr "bdver1_decode" "direct")])
 
-(define_insn "sqrt_extend<mode>xf2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(sqrt:XF
-	  (float_extend:XF
-	    (match_operand:MODEF 1 "register_operand" "0"))))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "direct")
-   (set_attr "amdfam10_decode" "direct")
-   (set_attr "bdver1_decode" "direct")])
-
 (define_insn "*rsqrtsf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
 		   UNSPEC_RSQRT))]
   "TARGET_SSE && TARGET_SSE_MATH"
-  "%vrsqrtss\t{%1, %d0|%d0, %1}"
+  "@
+   %vrsqrtss\t{%d1, %0|%0, %d1}
+   %vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "rcp")
    (set_attr "btver2_sse_attr" "rcp")
@@ -15040,11 +14934,13 @@
 })
 
 (define_insn "*sqrt<mode>2_sse"
-  [(set (match_operand:MODEF 0 "register_operand" "=v")
+  [(set (match_operand:MODEF 0 "register_operand" "=v,v")
 	(sqrt:MODEF
-	  (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
+	  (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "%vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
+  "@
+   %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
+   %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
    (set_attr "btver2_sse_attr" "sqrt")
@@ -15075,20 +14971,30 @@
   if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
     {
       rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = force_reg (<MODE>mode, operands[1]);
-
-      emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_sqrtxf2 (op0, op1));
       emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
       DONE;
    }
 })
 
+(define_insn "x86_fnstsw_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "SI")
+   (set_attr "unit" "i387")])
+
 (define_insn "fpremxf4_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
 		    (match_operand:XF 3 "register_operand" "1")]
 		   UNSPEC_FPREM_F))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
 	(unspec:XF [(match_dup 2) (match_dup 3)]
 		   UNSPEC_FPREM_U))
    (set (reg:CCFP FPSR_REG)
@@ -15163,7 +15069,7 @@
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
 		    (match_operand:XF 3 "register_operand" "1")]
 		   UNSPEC_FPREM1_F))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
 	(unspec:XF [(match_dup 2) (match_dup 3)]
 		   UNSPEC_FPREM1_U))
    (set (reg:CCFP FPSR_REG)
@@ -15242,7 +15148,7 @@
 	[(UNSPEC_SIN "sin")
 	 (UNSPEC_COS "cos")])
 
-(define_insn "*<sincos>xf2_i387"
+(define_insn "<sincos>xf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
 		   SINCOS))]
@@ -15253,31 +15159,29 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "*<sincos>_extend<mode>xf2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 1 "register_operand" "0"))]
-		   SINCOS))]
+(define_expand "<sincos><mode>2"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
+		      SINCOS))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
-  "f<sincos>"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
-;; When sincos pattern is defined, sin and cos builtin functions will be
-;; expanded to sincos pattern with one of its outputs left unused.
-;; CSE pass will figure out if two sincos patterns can be combined,
-;; otherwise sincos pattern will be split back to sin or cos pattern,
-;; depending on the unused output.
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_<sincos>xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
 
 (define_insn "sincosxf3"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
 		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
@@ -15286,70 +15190,10 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_split
-  [(set (match_operand:XF 0 "register_operand")
-	(unspec:XF [(match_operand:XF 2 "register_operand")]
-		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand")
-	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
-  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
-   && can_create_pseudo_p ()"
-  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))])
-
-(define_split
-  [(set (match_operand:XF 0 "register_operand")
-	(unspec:XF [(match_operand:XF 2 "register_operand")]
-		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand")
-	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
-  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
-   && can_create_pseudo_p ()"
-  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))])
-
-(define_insn "sincos_extend<mode>xf3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 2 "register_operand" "0"))]
-		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand" "=u")
-        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "fsincos"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
-(define_split
-  [(set (match_operand:XF 0 "register_operand")
-	(unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 2 "register_operand"))]
-		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand")
-	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
-  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
-   && can_create_pseudo_p ()"
-  [(set (match_dup 1)
-	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))])
-
-(define_split
-  [(set (match_operand:XF 0 "register_operand")
-	(unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 2 "register_operand"))]
-		   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand")
-	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
-  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
-   && can_create_pseudo_p ()"
-  [(set (match_dup 0)
-	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))])
-
 (define_expand "sincos<mode>3"
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "register_operand"))
-   (use (match_operand:MODEF 2 "register_operand"))]
+   (use (match_operand:MODEF 2 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
@@ -15357,39 +15201,23 @@
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1));
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_sincosxf3 (op0, op1, op2));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[1], op1));
   DONE;
 })
 
 (define_insn "fptanxf4_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(match_operand:XF 3 "const_double_operand" "F"))
-   (set (match_operand:XF 1 "register_operand" "=u")
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(match_operand:SF 3 "const1_operand"))
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
 		   UNSPEC_TAN))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
-   && standard_80387_constant_p (operands[3]) == 2"
-  "fptan"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
-(define_insn "fptan_extend<mode>xf4_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-	(match_operand:MODEF 3 "const_double_operand" "F"))
-   (set (match_operand:XF 1 "register_operand" "=u")
-        (unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 2 "register_operand" "0"))]
-		   UNSPEC_TAN))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations
-   && standard_80387_constant_p (operands[3]) == 2"
+   && flag_unsafe_math_optimizations"
   "fptan"
   [(set_attr "type" "fpspc")
    (set_attr "znver1_decode" "vector")
@@ -15401,36 +15229,33 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx one = gen_reg_rtx (XFmode);
-  rtx op2 = CONST1_RTX (XFmode); /* fld1 */
-
-  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
+  rtx one = gen_reg_rtx (SFmode);
+  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
+				CONST1_RTX (SFmode)));
   DONE;
 })
 
 (define_expand "tan<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  rtx one = gen_reg_rtx (<MODE>mode);
-  rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
-
-  emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
-					     operands[1], op2));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
-  DONE;
-})
-
-(define_insn "*fpatanxf3_i387"
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_tanxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_insn "atan2xf3"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-	            (match_operand:XF 2 "register_operand" "u")]
+	            (match_operand:XF 2 "register_operand" "f")]
 	           UNSPEC_FPATAN))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
@@ -15440,45 +15265,24 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "fpatan_extend<mode>xf3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-        (unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 1 "register_operand" "0"))
-		    (float_extend:XF
-		      (match_operand:MODEF 2 "register_operand" "u"))]
-	           UNSPEC_FPATAN))
-   (clobber (match_scratch:XF 3 "=2"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "fpatan"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
-(define_expand "atan2xf3"
-  [(parallel [(set (match_operand:XF 0 "register_operand")
-		   (unspec:XF [(match_operand:XF 2 "register_operand")
-			       (match_operand:XF 1 "register_operand")]
-			      UNSPEC_FPATAN))
-	      (clobber (match_scratch:XF 3))])]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations")
-
 (define_expand "atan2<mode>3"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))
-   (use (match_operand:MODEF 2 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:MODEF 2 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_insn (gen_atan2xf3 (op0, op2, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15490,26 +15294,22 @@
 	      (clobber (match_scratch:XF 3))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  operands[2] = gen_reg_rtx (XFmode);
-  emit_move_insn (operands[2], CONST1_RTX (XFmode));  /* fld1 */
-})
+  "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
 
 (define_expand "atan<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  rtx op2 = gen_reg_rtx (<MODE>mode);
-  emit_move_insn (op2, CONST1_RTX (<MODE>mode));  /* fld1 */
-
-  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_atanxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15531,7 +15331,7 @@
   for (i = 2; i < 6; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
-  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));
 })
 
 (define_expand "asin<mode>2"
@@ -15547,7 +15347,7 @@
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_asinxf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15569,7 +15369,7 @@
   for (i = 2; i < 6; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
-  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));
 })
 
 (define_expand "acos<mode>2"
@@ -15585,14 +15385,14 @@
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_acosxf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
 (define_insn "fyl2xxf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-		    (match_operand:XF 2 "register_operand" "u")]
+		    (match_operand:XF 2 "register_operand" "f")]
 	           UNSPEC_FYL2X))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
@@ -15602,22 +15402,6 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "fyl2x_extend<mode>xf3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-        (unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 1 "register_operand" "0"))
-		    (match_operand:XF 2 "register_operand" "u")]
-	           UNSPEC_FYL2X))
-   (clobber (match_scratch:XF 3 "=2"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "fyl2x"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
 (define_expand "logxf2"
   [(parallel [(set (match_operand:XF 0 "register_operand")
 		   (unspec:XF [(match_operand:XF 1 "register_operand")
@@ -15626,25 +15410,24 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  operands[2] = gen_reg_rtx (XFmode);
-  emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+  operands[2]
+    = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
 })
 
 (define_expand "log<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  rtx op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
-
-  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_logxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15656,25 +15439,24 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  operands[2] = gen_reg_rtx (XFmode);
-  emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+  operands[2]
+    = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
 })
 
 (define_expand "log10<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  rtx op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
-
-  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_log10xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15685,33 +15467,29 @@
 	      (clobber (match_scratch:XF 3))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  operands[2] = gen_reg_rtx (XFmode);
-  emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
-})
+  "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
 
 (define_expand "log2<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
-
-  rtx op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
-
-  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_log2xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
 (define_insn "fyl2xp1xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-		    (match_operand:XF 2 "register_operand" "u")]
+		    (match_operand:XF 2 "register_operand" "f")]
 	           UNSPEC_FYL2XP1))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
@@ -15721,22 +15499,6 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "fyl2xp1_extend<mode>xf3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-        (unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 1 "register_operand" "0"))
-		    (match_operand:XF 2 "register_operand" "u")]
-	           UNSPEC_FYL2XP1))
-   (clobber (match_scratch:XF 3 "=2"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "fyl2xp1"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
 (define_expand "log1pxf2"
   [(use (match_operand:XF 0 "register_operand"))
    (use (match_operand:XF 1 "register_operand"))]
@@ -15749,20 +15511,18 @@
 
 (define_expand "log1p<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0;
-
-  op0 = gen_reg_rtx (XFmode);
-
-  operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
-
-  ix86_emit_i387_log1p (op0, operands[1]);
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_log1pxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15770,7 +15530,7 @@
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
 		   UNSPEC_XTRACT_FRACT))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
@@ -15779,22 +15539,6 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "fxtract_extend<mode>xf3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(unspec:XF [(float_extend:XF
-		      (match_operand:MODEF 2 "register_operand" "0"))]
-		   UNSPEC_XTRACT_FRACT))
-   (set (match_operand:XF 1 "register_operand" "=u")
-        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "fxtract"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "XF")])
-
 (define_expand "logbxf2"
   [(parallel [(set (match_dup 2)
 		   (unspec:XF [(match_operand:XF 1 "register_operand")]
@@ -15807,7 +15551,7 @@
 
 (define_expand "logb<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
@@ -15816,8 +15560,9 @@
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_logbxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op1));
   DONE;
 })
 
@@ -15842,21 +15587,23 @@
 
 (define_expand "ilogb<mode>2"
   [(use (match_operand:SI 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
+  rtx op0, op1, op2;
 
   if (optimize_insn_for_size_p ())
     FAIL;
 
   op0 = gen_reg_rtx (XFmode);
   op1 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
+  emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
   emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
   DONE;
 })
@@ -15877,7 +15624,7 @@
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
 		    (match_operand:XF 3 "register_operand" "1")]
 		   UNSPEC_FSCALE_FRACT))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
 	(unspec:XF [(match_dup 2) (match_dup 3)]
 		   UNSPEC_FSCALE_EXP))]
   "TARGET_USE_FANCY_MATH_387
@@ -15908,7 +15655,7 @@
   for (i = 3; i < 10; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
-  emit_move_insn (operands[7], CONST1_RTX (XFmode));  /* fld1 */
+  emit_move_insn (operands[7], CONST1_RTX (XFmode));
 })
 
 (define_expand "expxf2"
@@ -15917,10 +15664,7 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx op2;
-
-  op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
+  rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
   DONE;
@@ -15934,14 +15678,12 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_expxf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15951,10 +15693,7 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx op2;
-
-  op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
+  rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
   DONE;
@@ -15968,14 +15707,12 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_exp10xf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -15985,10 +15722,7 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx op2;
-
-  op2 = gen_reg_rtx (XFmode);
-  emit_move_insn (op2, CONST1_RTX (XFmode));  /* fld1 */
+  rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
   DONE;
@@ -16002,14 +15736,12 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_exp2xf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -16018,7 +15750,6 @@
 			       (match_dup 2)))
    (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
    (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
-   (set (match_dup 9) (float_extend:XF (match_dup 13)))
    (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
    (parallel [(set (match_dup 7)
 		   (unspec:XF [(match_dup 6) (match_dup 4)]
@@ -16032,8 +15763,7 @@
 	      (set (match_dup 11)
 		   (unspec:XF [(match_dup 9) (match_dup 8)]
 			      UNSPEC_FSCALE_EXP))])
-   (set (match_dup 12) (minus:XF (match_dup 10)
-				 (float_extend:XF (match_dup 13))))
+   (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
    (set (match_operand:XF 0 "register_operand")
 	(plus:XF (match_dup 12) (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
@@ -16044,10 +15774,8 @@
   for (i = 2; i < 13; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
-  operands[13]
-    = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */
-
   emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+  emit_move_insn (operands[9], CONST1_RTX (XFmode));
 })
 
 (define_expand "expm1<mode>2"
@@ -16058,14 +15786,12 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_expm1xf2 (op0, op1));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -16076,10 +15802,8 @@
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx tmp1, tmp2;
-
-  tmp1 = gen_reg_rtx (XFmode);
-  tmp2 = gen_reg_rtx (XFmode);
+  rtx tmp1 = gen_reg_rtx (XFmode);
+  rtx tmp2 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_floatsixf2 (tmp1, operands[2]));
   emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
@@ -16096,14 +15820,12 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -16117,9 +15839,7 @@
 			      UNSPEC_FSCALE_EXP))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  operands[3] = gen_reg_rtx (XFmode);
-})
+  "operands[3] = gen_reg_rtx (XFmode);")
 
 (define_expand "scalb<mode>3"
   [(use (match_operand:MODEF 0 "register_operand"))
@@ -16130,16 +15850,14 @@
        || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
-  rtx op0, op1, op2;
-
-  op0 = gen_reg_rtx (XFmode);
-  op1 = gen_reg_rtx (XFmode);
-  op2 = gen_reg_rtx (XFmode);
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
   emit_insn (gen_scalbxf3 (op0, op1, op2));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
@@ -16155,7 +15873,7 @@
 
 (define_expand "significand<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
+   (use (match_operand:MODEF 1 "general_operand"))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
@@ -16164,15 +15882,16 @@
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_significandxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
   DONE;
 })
 
 
 (define_insn "sse4_1_round<mode>2"
   [(set (match_operand:MODEF 0 "register_operand" "=x,v")
-	(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x,v")
+	(unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm")
 		       (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
 		      UNSPEC_ROUND))]
   "TARGET_SSE4_1"
@@ -16196,22 +15915,10 @@
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
-(define_insn "rint<mode>2_frndint"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-	(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0")]
-		      UNSPEC_FRNDINT))]
-  "TARGET_USE_FANCY_MATH_387"
-  "frndint"
-  [(set_attr "type" "fpspc")
-   (set_attr "znver1_decode" "vector")
-   (set_attr "mode" "<MODE>")])
-
 (define_expand "rint<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
-  "(TARGET_USE_FANCY_MATH_387
-    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-	|| TARGET_MIX_SSE_I387))
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "TARGET_USE_FANCY_MATH_387
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
@@ -16223,7 +15930,46 @@
 	ix86_expand_rint (operands[0], operands[1]);
     }
   else
-    emit_insn (gen_rint<mode>2_frndint (operands[0], operands[1]));
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_expand "nearbyintxf2"
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 1 "register_operand")]
+		   UNSPEC_FRNDINT))]
+  "TARGET_USE_FANCY_MATH_387
+   && !flag_trapping_math")
+
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	  || TARGET_MIX_SSE_I387)
+    && !flag_trapping_math)
+   || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE4_1 && TARGET_SSE_MATH)
+    emit_insn (gen_sse4_1_round<mode>2
+	       (operands[0], operands[1], GEN_INT (ROUND_MXCSR
+						   | ROUND_NO_EXC)));
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_nearbyintxf2 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+    }
   DONE;
 })
 
@@ -16233,7 +15979,8 @@
   "(TARGET_USE_FANCY_MATH_387
     && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
 	|| TARGET_MIX_SSE_I387)
-    && flag_unsafe_math_optimizations)
+    && flag_unsafe_math_optimizations
+    && (flag_fp_int_builtin_inexact || !flag_trapping_math))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
        && !flag_trapping_math && !flag_rounding_math)"
 {
@@ -16258,91 +16005,18 @@
   DONE;
 })
 
-(define_insn_and_split "*fistdi2_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand")
-	(unspec:DI [(match_operand:XF 1 "register_operand")]
-		   UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fistdi2 (operands[0], operands[1]));
-  else
-    {
-      operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
-      emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
-					 operands[2]));
-    }
-  DONE;
-}
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "DI")])
-
-(define_insn "fistdi2"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
+(define_insn "lrintxfdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
 	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
 		   UNSPEC_FIST))
-   (clobber (match_scratch:XF 2 "=&1f"))]
+   (clobber (match_scratch:XF 2 "=&f"))]
   "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, false);"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "DI")])
 
-(define_insn "fistdi2_with_temp"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
-	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
-		   UNSPEC_FIST))
-   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
-   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
-  "TARGET_USE_FANCY_MATH_387"
-  "#"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "DI")])
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(unspec:DI [(match_operand:XF 1 "register_operand")]
-		   UNSPEC_FIST))
-   (clobber (match_operand:DI 2 "memory_operand"))
-   (clobber (match_scratch 3))]
-  "reload_completed"
-  [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
-	      (clobber (match_dup 3))])
-   (set (match_dup 0) (match_dup 2))])
-
-(define_split
-  [(set (match_operand:DI 0 "memory_operand")
-	(unspec:DI [(match_operand:XF 1 "register_operand")]
-		   UNSPEC_FIST))
-   (clobber (match_operand:DI 2 "memory_operand"))
-   (clobber (match_scratch 3))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
-	      (clobber (match_dup 3))])])
-
-(define_insn_and_split "*fist<mode>2_1"
-  [(set (match_operand:SWI24 0 "register_operand")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
-		      UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-  emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
-					operands[2]));
-  DONE;
-}
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "fist<mode>2"
-  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+(define_insn "lrintxf<mode>2"
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
 	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
 		      UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387"
@@ -16350,39 +16024,6 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fist<mode>2_with_temp"
-  [(set (match_operand:SWI24 0 "register_operand" "=r")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
-		      UNSPEC_FIST))
-   (clobber (match_operand:SWI24 2 "memory_operand" "=m"))]
-  "TARGET_USE_FANCY_MATH_387"
-  "#"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
-		      UNSPEC_FIST))
-   (clobber (match_operand:SWI24 2 "memory_operand"))]
-  "reload_completed"
-  [(set (match_dup 2) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))
-   (set (match_dup 0) (match_dup 2))])
-
-(define_split
-  [(set (match_operand:SWI24 0 "memory_operand")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
-		      UNSPEC_FIST))
-   (clobber (match_operand:SWI24 2 "memory_operand"))]
-  "reload_completed"
-  [(set (match_dup 0) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))])
-
-(define_expand "lrintxf<mode>2"
-  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
-     (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
-		     UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387")
-
 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
   [(set (match_operand:SWI48 0 "nonimmediate_operand")
      (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
@@ -16446,9 +16087,9 @@
 	 (UNSPEC_FIST_CEIL "CEIL")])
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
-(define_insn_and_split "frndint<mode>2_<rounding>"
-  [(set (match_operand:X87MODEF 0 "register_operand")
-	(unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand")]
+(define_insn_and_split "frndintxf2_<rounding>"
+  [(set (match_operand:XF 0 "register_operand")
+	(unspec:XF [(match_operand:XF 1 "register_operand")]
 		   FRNDINT_ROUNDING))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
@@ -16463,18 +16104,18 @@
   operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
   operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
 
-  emit_insn (gen_frndint<mode>2_<rounding>_i387 (operands[0], operands[1],
-						 operands[2], operands[3]));
+  emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
+					     operands[2], operands[3]));
   DONE;
 }
   [(set_attr "type" "frndint")
    (set_attr "i387_cw" "<rounding>")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "frndint<mode>2_<rounding>_i387"
-  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
-	(unspec:X87MODEF [(match_operand:X87MODEF 1 "register_operand" "0")]
-			 FRNDINT_ROUNDING))
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_<rounding>_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   FRNDINT_ROUNDING))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))]
   "TARGET_USE_FANCY_MATH_387
@@ -16482,7 +16123,7 @@
   "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
   [(set_attr "type" "frndint")
    (set_attr "i387_cw" "<rounding>")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "XF")])
 
 (define_expand "<rounding_insn>xf2"
   [(parallel [(set (match_operand:XF 0 "register_operand")
@@ -16502,11 +16143,11 @@
 	|| TARGET_MIX_SSE_I387)
     && (flag_fp_int_builtin_inexact || !flag_trapping_math))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && (TARGET_SSE4_1 || !flag_trapping_math
-	   || flag_fp_int_builtin_inexact))"
+       && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact
+	   || !flag_trapping_math))"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-      && (TARGET_SSE4_1 || !flag_trapping_math || flag_fp_int_builtin_inexact))
+      && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
     {
       if (TARGET_SSE4_1)
 	emit_insn (gen_sse4_1_round<mode>2
@@ -16536,72 +16177,14 @@
 	}
     }
   else
-    emit_insn (gen_frndint<mode>2_<rounding> (operands[0], operands[1]));
-  DONE;
-})
-
-;; Rounding mode control word calculation could clobber FLAGS_REG.
-(define_insn_and_split "frndintxf2_mask_pm"
-  [(set (match_operand:XF 0 "register_operand")
-	(unspec:XF [(match_operand:XF 1 "register_operand")]
-		   UNSPEC_FRNDINT_MASK_PM))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
-
-  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
-  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
-
-  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
-					  operands[2], operands[3]));
-  DONE;
-}
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_insn "frndintxf2_mask_pm_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-		   UNSPEC_FRNDINT_MASK_PM))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_expand "nearbyintxf2"
-  [(parallel [(set (match_operand:XF 0 "register_operand")
-		   (unspec:XF [(match_operand:XF 1 "register_operand")]
-			      UNSPEC_FRNDINT_MASK_PM))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations")
-
-(define_expand "nearbyint<mode>2"
-  [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
-
-  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_<rounding> (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+    }
   DONE;
 })
 
@@ -16622,16 +16205,9 @@
 
   operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
   operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
-					   operands[2], operands[3]));
-  else
-    {
-      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-      emit_insn (gen_fist<mode>2_<rounding>_with_temp
-		  (operands[0], operands[1], operands[2],
-		   operands[3], operands[4]));
-    }
+
+  emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
+					 operands[2], operands[3]));
   DONE;
 }
   [(set_attr "type" "fistp")
@@ -16639,12 +16215,12 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "fistdi2_<rounding>"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
 	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
 		   FIST_ROUNDING))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))
-   (clobber (match_scratch:XF 4 "=&1f"))]
+   (clobber (match_scratch:XF 4 "=&f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "* return output_fix_trunc (insn, operands, false);"
@@ -16652,54 +16228,8 @@
    (set_attr "i387_cw" "<rounding>")
    (set_attr "mode" "DI")])
 
-(define_insn "fistdi2_<rounding>_with_temp"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
-	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
-		   FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
-   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "<rounding>")
-   (set_attr "mode" "DI")])
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(unspec:DI [(match_operand:XF 1 "register_operand")]
-		   FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:DI 4 "memory_operand"))
-   (clobber (match_scratch 5))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4)
-		   (unspec:DI [(match_dup 1)] FIST_ROUNDING))
-	      (use (match_dup 2))
-	      (use (match_dup 3))
-	      (clobber (match_dup 5))])
-   (set (match_dup 0) (match_dup 4))])
-
-(define_split
-  [(set (match_operand:DI 0 "memory_operand")
-	(unspec:DI [(match_operand:XF 1 "register_operand")]
-		   FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:DI 4 "memory_operand"))
-   (clobber (match_scratch 5))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0)
-		   (unspec:DI [(match_dup 1)] FIST_ROUNDING))
-	      (use (match_dup 2))
-	      (use (match_dup 3))
-	      (clobber (match_dup 5))])])
-
 (define_insn "fist<mode>2_<rounding>"
-  [(set (match_operand:SWI24 0 "memory_operand" "=m")
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
 	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
 		      FIST_ROUNDING))
    (use (match_operand:HI 2 "memory_operand" "m"))
@@ -16711,47 +16241,6 @@
    (set_attr "i387_cw" "<rounding>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fist<mode>2_<rounding>_with_temp"
-  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f,f")]
-		      FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "<rounding>")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
-		      FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:SWI24 4 "memory_operand"))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4)
-		   (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING))
-	      (use (match_dup 2))
-	      (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))])
-
-(define_split
-  [(set (match_operand:SWI24 0 "memory_operand")
-	(unspec:SWI24 [(match_operand:XF 1 "register_operand")]
-		      FIST_ROUNDING))
-   (use (match_operand:HI 2 "memory_operand"))
-   (use (match_operand:HI 3 "memory_operand"))
-   (clobber (match_operand:SWI24 4 "memory_operand"))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0)
-		   (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING))
-	      (use (match_dup 2))
-	      (use (match_dup 3))])])
-
 (define_expand "l<rounding_insn>xf<mode>2"
   [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
 		   (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
@@ -16767,12 +16256,19 @@
 				 FIST_ROUNDING))
 	      (clobber (reg:CC FLAGS_REG))])]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
-   && !flag_trapping_math"
-{
-  if (TARGET_64BIT && optimize_insn_for_size_p ())
-    FAIL;
-
-  if (ROUND_<ROUNDING> == ROUND_FLOOR)
+   && (TARGET_SSE4_1 || !flag_trapping_math)"
+{
+  if (TARGET_SSE4_1)
+    {
+      rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
+
+      emit_insn (gen_sse4_1_round<mode>2
+		 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
+					     | ROUND_NO_EXC)));
+      emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
+		 (operands[0], tmp));
+    }
+  else if (ROUND_<ROUNDING> == ROUND_FLOOR)
     ix86_expand_lfloorceil (operands[0], operands[1], true);
   else if (ROUND_<ROUNDING> == ROUND_CEIL)
     ix86_expand_lfloorceil (operands[0], operands[1], false);
@@ -16794,81 +16290,6 @@
    (set_attr "unit" "i387")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "fxam<mode>2_i387_with_temp"
-  [(set (match_operand:HI 0 "register_operand")
-	(unspec:HI
-	  [(match_operand:MODEF 1 "memory_operand")]
-	  UNSPEC_FXAM_MEM))]
-  "TARGET_USE_FANCY_MATH_387
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 2)(match_dup 1))
-   (set (match_dup 0)
-	(unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
-{
-  operands[2] = gen_reg_rtx (<MODE>mode);
-
-  MEM_VOLATILE_P (operands[1]) = 1;
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_expand "isinfxf2"
-  [(use (match_operand:SI 0 "register_operand"))
-   (use (match_operand:XF 1 "register_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && ix86_libc_has_function (function_c99_misc)"
-{
-  rtx mask = GEN_INT (0x45);
-  rtx val = GEN_INT (0x05);
-
-  rtx scratch = gen_reg_rtx (HImode);
-  rtx res = gen_reg_rtx (QImode);
-
-  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
-
-  emit_insn (gen_andqi_ext_1 (scratch, scratch, mask));
-  emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  ix86_expand_setcc (res, EQ,
-		     gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
-  emit_insn (gen_zero_extendqisi2 (operands[0], res));
-  DONE;
-})
-
-(define_expand "isinf<mode>2"
-  [(use (match_operand:SI 0 "register_operand"))
-   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && ix86_libc_has_function (function_c99_misc)
-   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  rtx mask = GEN_INT (0x45);
-  rtx val = GEN_INT (0x05);
-
-  rtx scratch = gen_reg_rtx (HImode);
-  rtx res = gen_reg_rtx (QImode);
-
-  /* Remove excess precision by forcing value through memory. */
-  if (memory_operand (operands[1], VOIDmode))
-    emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
-  else
-    {
-      rtx temp = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-
-      emit_move_insn (temp, operands[1]);
-      emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
-    }
-
-  emit_insn (gen_andqi_ext_1 (scratch, scratch, mask));
-  emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  ix86_expand_setcc (res, EQ,
-		     gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
-  emit_insn (gen_zero_extendqisi2 (operands[0], res));
-  DONE;
-})
-
 (define_expand "signbittf2"
   [(use (match_operand:SI 0 "register_operand"))
    (use (match_operand:TF 1 "register_operand"))]
@@ -17721,14 +17142,9 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "sbb{<imodesuffix>}\t%0, %0"
-  ; Since we don't have the proper number of operands for an alu insn,
-  ; fill in all the blanks.
-  [(set_attr "type" "alu")
-   (set_attr "modrm_class" "op0")
+  [(set_attr "type" "alu1")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
-   (set_attr "memory" "none")
-   (set_attr "imm_disp" "false")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
@@ -17741,12 +17157,9 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "sbb{<imodesuffix>}\t%0, %0"
-  [(set_attr "type" "alu")
-   (set_attr "modrm_class" "op0")
+  [(set_attr "type" "alu1")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
-   (set_attr "memory" "none")
-   (set_attr "imm_disp" "false")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
@@ -17757,12 +17170,9 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "sbb{<imodesuffix>}\t%0, %0"
-  [(set_attr "type" "alu")
-   (set_attr "modrm_class" "op0")
+  [(set_attr "type" "alu1")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
-   (set_attr "memory" "none")
-   (set_attr "imm_disp" "false")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
@@ -18308,28 +17718,6 @@
   "* return output_probe_stack_range (operands[0], operands[2]);"
   [(set_attr "type" "multi")])
 
-/* Additional processing for builtin_setjmp.  Store the shadow stack pointer
-   as a forth element in jmpbuf.  */
-(define_expand "builtin_setjmp_setup"
-  [(match_operand 0 "address_operand")]
-  "TARGET_SHSTK"
-{
-  if (flag_cf_protection & CF_RETURN)
-    {
-      rtx mem, reg_ssp;
-
-      mem = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0],
-					       3 * GET_MODE_SIZE (Pmode)));
-      reg_ssp = gen_reg_rtx (Pmode);
-      emit_insn (gen_rtx_SET (reg_ssp, const0_rtx));
-      emit_insn ((Pmode == SImode)
-		  ? gen_rdsspsi (reg_ssp, reg_ssp)
-		  : gen_rdsspdi (reg_ssp, reg_ssp));
-      emit_move_insn (mem, reg_ssp);
-    }
-  DONE;
-})
-
 (define_expand "builtin_setjmp_receiver"
   [(label_ref (match_operand 0))]
   "!TARGET_64BIT && flag_pic"
@@ -18350,80 +17738,141 @@
   DONE;
 })
 
-(define_expand "builtin_longjmp"
-  [(match_operand 0 "address_operand")]
-  "TARGET_SHSTK"
-{
-  rtx fp, lab, stack;
-  rtx jump, label, reg_adj, reg_ssp, reg_minus, mem_buf, tmp, clob;
-  machine_mode sa_mode = STACK_SAVEAREA_MODE (SAVE_NONLOCAL);
-
-  /* Adjust the shadow stack pointer (ssp) to the value saved in the
-     jmp_buf.  The saving was done in the builtin_setjmp_setup.  */
-  if (flag_cf_protection & CF_RETURN)
-    {
-      /* Get current shadow stack pointer.  The code below will check if
-	 SHSTK feature is enabled.  If it's not enabled RDSSP instruction
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+        (match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot;
+  if ((flag_cf_protection & CF_RETURN))
+    {
+      /* Copy shadow stack pointer to the first slot and stack ppointer
+	 to the second slot.  */
+      rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
+      stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
+      rtx ssp = gen_reg_rtx (word_mode);
+      emit_insn ((word_mode == SImode)
+		 ? gen_rdsspsi (ssp)
+		 : gen_rdsspdi (ssp));
+      emit_move_insn (ssp_slot, ssp);
+    }
+  else
+    stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot;
+  if ((flag_cf_protection & CF_RETURN))
+    {
+      /* Restore shadow stack pointer from the first slot and stack
+	 pointer from the second slot.  */
+      rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
+      stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
+
+      rtx flags, jump, noadj_label, inc_label, loop_label;
+      rtx reg_adj, reg_ssp, tmp, clob;
+
+      /* Get the current shadow stack pointer.  The code below will check if
+	 SHSTK feature is enabled.  If it is not enabled the RDSSP instruction
 	 is a NOP.  */
-      reg_ssp = gen_reg_rtx (Pmode);
+      reg_ssp = gen_reg_rtx (word_mode);
       emit_insn (gen_rtx_SET (reg_ssp, const0_rtx));
-      emit_insn ((Pmode == SImode)
-		 ? gen_rdsspsi (reg_ssp, reg_ssp)
-		 : gen_rdsspdi (reg_ssp, reg_ssp));
-      mem_buf = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0],
-						   3 * GET_MODE_SIZE (Pmode))),
+      emit_insn ((word_mode == SImode)
+		 ? gen_rdsspsi (reg_ssp)
+		 : gen_rdsspdi (reg_ssp));
 
       /* Compare through substraction the saved and the current ssp to decide
 	 if ssp has to be adjusted.  */
-      reg_minus = gen_reg_rtx (Pmode);
-      tmp = gen_rtx_SET (reg_minus, gen_rtx_MINUS (Pmode, reg_ssp, mem_buf));
+      tmp = gen_rtx_SET (reg_ssp, gen_rtx_MINUS (word_mode, reg_ssp,
+						 ssp_slot));
       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
       emit_insn (tmp);
 
-      /* Jump over adjustment code.  */
-      label = gen_label_rtx ();
-      tmp = gen_rtx_REG (CCmode, FLAGS_REG);
-      tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+      /* Compare and jump over adjustment code.  */
+      noadj_label = gen_label_rtx ();
+      flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+      tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  gen_rtx_LABEL_REF (VOIDmode, noadj_label),
 				  pc_rtx);
       jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-      JUMP_LABEL (jump) = label;
-
-      /* Adjust the ssp.  */
-      reg_adj = gen_reg_rtx (Pmode);
+      JUMP_LABEL (jump) = noadj_label;
+
+      /* Compute the numebr of frames to adjust.  */
+      reg_adj = gen_lowpart (ptr_mode, reg_ssp);
       tmp = gen_rtx_SET (reg_adj,
-			 gen_rtx_LSHIFTRT (Pmode, negate_rtx (Pmode, reg_minus),
-					   GEN_INT (3)));
+			 gen_rtx_LSHIFTRT (ptr_mode,
+					   negate_rtx (ptr_mode, reg_adj),
+					   GEN_INT ((word_mode == SImode)
+						    ? 2
+						    : 3)));
       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
       emit_insn (tmp);
-      emit_insn ((Pmode == SImode)
-		 ? gen_incsspsi (reg_adj)
-		 : gen_incsspdi (reg_adj));
-
-      emit_label (label);
-      LABEL_NUSES (label) = 1;
-    }
-
-  /* This code is the same as in expand_buildin_longjmp.  */
-  fp = gen_rtx_MEM (Pmode, operands[0]);
-  lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0],
-					   GET_MODE_SIZE (Pmode)));
-  stack = gen_rtx_MEM (sa_mode, plus_constant (Pmode, operands[0],
-					       2 * GET_MODE_SIZE (Pmode)));
-  lab = copy_to_reg (lab);
-
-  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
-  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
-
-  emit_move_insn (hard_frame_pointer_rtx, fp);
-  emit_stack_restore (SAVE_NONLOCAL, stack);
-
-  emit_use (hard_frame_pointer_rtx);
-  emit_use (stack_pointer_rtx);
-  emit_indirect_jump (lab);
+
+      /* Check if number of frames <= 255 so no loop is needed.  */
+      tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255));
+      flags = gen_rtx_REG (CCmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flags, tmp));
+
+      inc_label = gen_label_rtx ();
+      tmp = gen_rtx_LEU (VOIDmode, flags, const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, inc_label),
+				  pc_rtx);
+      jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+      JUMP_LABEL (jump) = inc_label;
+
+      rtx reg_255 = gen_reg_rtx (word_mode);
+      emit_move_insn (reg_255, GEN_INT (255));
+
+      /* Adjust the ssp in a loop.  */
+      loop_label = gen_label_rtx ();
+      emit_label (loop_label);
+      LABEL_NUSES (loop_label) = 1;
+
+      emit_insn ((word_mode == SImode)
+		 ? gen_incsspsi (reg_255)
+		 : gen_incsspdi (reg_255));
+      tmp = gen_rtx_SET (reg_adj, gen_rtx_MINUS (ptr_mode,
+						 reg_adj,
+						 GEN_INT (255)));
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+      emit_insn (tmp);
+
+      tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255));
+      flags = gen_rtx_REG (CCmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flags, tmp));
+
+      /* Jump to the loop label.  */
+      tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, loop_label),
+				  pc_rtx);
+      jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+      JUMP_LABEL (jump) = loop_label;
+
+      emit_label (inc_label);
+      LABEL_NUSES (inc_label) = 1;
+      emit_insn ((word_mode == SImode)
+		 ? gen_incsspsi (reg_ssp)
+		 : gen_incsspdi (reg_ssp));
+
+      emit_label (noadj_label);
+      LABEL_NUSES (noadj_label) = 1;
+    }
+  else
+    stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
+  DONE;
 })
 
 
@@ -18855,6 +18304,37 @@
 		       const0_rtx);
 })
 
+;; Likewise for cmpelim optimized pattern.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg FLAGS_REG)
+		   (compare (match_operator:SWI 3 "plusminuslogic_operator"
+			      [(match_dup 0)
+			       (match_operand:SWI 2 "<nonmemory_operand>")])
+			    (const_int 0)))
+	      (set (match_dup 0) (match_dup 3))])
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && ix86_match_ccmode (peep2_next_insn (1),
+			 (GET_CODE (operands[3]) == PLUS
+			  || GET_CODE (operands[3]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+	      (set (match_dup 1) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
+  operands[5]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+		      copy_rtx (operands[1]), operands[2]);
+  operands[6]
+    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+		       const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -18898,7 +18378,7 @@
    (set (match_dup 1) (match_dup 0))
    (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
   "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
-   && GET_CODE (operands[2]) != MINUS
+   && COMMUTATIVE_ARITH_P (operands[2])
    && peep2_reg_dead_p (3, operands[0])
    && !reg_overlap_mentioned_p (operands[0], operands[1])
    && ix86_match_ccmode (peep2_next_insn (2),
@@ -18918,6 +18398,34 @@
 		       const0_rtx);
 })
 
+;; Likewise for cmpelim optimized pattern.
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+		   (compare (match_operator:SWI 2 "plusminuslogic_operator"
+			      [(match_operand:SWI 0 "register_operand")
+			       (match_operand:SWI 1 "memory_operand")])
+			    (const_int 0)))
+	      (set (match_dup 0) (match_dup 2))])
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && COMMUTATIVE_ARITH_P (operands[2])
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (0),
+			 GET_CODE (operands[2]) == PLUS
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 3) (match_dup 5))
+	      (set (match_dup 1) (match_dup 4))])]
+{
+  operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[4]
+    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+		      copy_rtx (operands[1]), operands[0]);
+  operands[5]
+    = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
+		       const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
 	(match_operand:SWI12 1 "memory_operand"))
@@ -19836,7 +19344,6 @@
 (define_insn "fnstenv"
   [(set (match_operand:BLK 0 "memory_operand" "=m")
 	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
-   (clobber (reg:HI FPCR_REG))
    (clobber (reg:XF ST0_REG))
    (clobber (reg:XF ST1_REG))
    (clobber (reg:XF ST2_REG))
@@ -19855,8 +19362,6 @@
 (define_insn "fldenv"
   [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
 		    UNSPECV_FLDENV)
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:HI FPCR_REG))
    (clobber (reg:XF ST0_REG))
    (clobber (reg:XF ST1_REG))
    (clobber (reg:XF ST2_REG))
@@ -20050,18 +19555,16 @@
 ;; CET instructions
 (define_insn "rdssp<mode>"
   [(set (match_operand:SWI48x 0 "register_operand" "=r")
-	(unspec_volatile:SWI48x
-	  [(match_operand:SWI48x 1 "register_operand" "0")]
-	  UNSPECV_NOP_RDSSP))]
-  "TARGET_SHSTK"
-  "rdssp<mskmodesuffix>\t%0"
-  [(set_attr "length" "4")
+	(unspec_volatile:SWI48x [(const_int 0)] UNSPECV_NOP_RDSSP))]
+  "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
+  "xor{l}\t%k0, %k0\n\trdssp<mskmodesuffix>\t%0"
+  [(set_attr "length" "6")
    (set_attr "type" "other")])
 
 (define_insn "incssp<mode>"
   [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")]
-		   UNSPECV_INCSSP)]
-  "TARGET_SHSTK"
+		    UNSPECV_INCSSP)]
+  "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
   "incssp<mskmodesuffix>\t%0"
   [(set_attr "length" "4")
    (set_attr "type" "other")])
@@ -20073,9 +19576,14 @@
   [(set_attr "length" "5")
    (set_attr "type" "other")])
 
-(define_insn "rstorssp"
-  [(unspec_volatile [(match_operand 0 "memory_operand" "m")]
-		   UNSPECV_RSTORSSP)]
+(define_expand "rstorssp"
+  [(unspec_volatile [(match_operand 0 "memory_operand")]
+		    UNSPECV_RSTORSSP)]
+  "TARGET_SHSTK")
+
+(define_insn "*rstorssp<mode>"
+  [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")]
+		    UNSPECV_RSTORSSP)]
   "TARGET_SHSTK"
   "rstorssp\t%0"
   [(set_attr "length" "5")
@@ -20084,7 +19592,7 @@
 (define_insn "wrss<mode>"
   [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")
 		     (match_operand:SWI48x 1 "memory_operand" "m")]
-		   UNSPECV_WRSS)]
+		    UNSPECV_WRSS)]
   "TARGET_SHSTK"
   "wrss<mskmodesuffix>\t%0, %1"
   [(set_attr "length" "3")
@@ -20093,7 +19601,7 @@
 (define_insn "wruss<mode>"
   [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")
 		     (match_operand:SWI48x 1 "memory_operand" "m")]
-		   UNSPECV_WRUSS)]
+		    UNSPECV_WRUSS)]
   "TARGET_SHSTK"
   "wruss<mskmodesuffix>\t%0, %1"
   [(set_attr "length" "4")
@@ -20106,9 +19614,14 @@
   [(set_attr "length" "4")
    (set_attr "type" "other")])
 
-(define_insn "clrssbsy"
-  [(unspec_volatile [(match_operand 0 "memory_operand" "m")]
-		   UNSPECV_CLRSSBSY)]
+(define_expand "clrssbsy"
+  [(unspec_volatile [(match_operand 0 "memory_operand")]
+		    UNSPECV_CLRSSBSY)]
+  "TARGET_SHSTK")
+
+(define_insn "*clrssbsy<mode>"
+  [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")]
+		    UNSPECV_CLRSSBSY)]
   "TARGET_SHSTK"
   "clrssbsy\t%0"
   [(set_attr "length" "4")
@@ -20116,9 +19629,10 @@
 
 (define_insn "nop_endbr"
   [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
-  "TARGET_IBT"
-  "*
-{ return (TARGET_64BIT)? \"endbr64\" : \"endbr32\"; }"
+  "(flag_cf_protection & CF_BRANCH)"
+{
+  return TARGET_64BIT ? "endbr64" : "endbr32";
+}
   [(set_attr "length" "4")
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
@@ -20247,161 +19761,6 @@
   [(set_attr "length" "3")
   (set_attr "memory" "unknown")])
 
-;; MPX instructions
-
-(define_expand "<mode>_mk"
-  [(set (match_operand:BND 0 "register_operand")
-	(unspec:BND
-	  [(mem:<bnd_ptr>
-	   (match_par_dup 3
-	     [(match_operand:<bnd_ptr> 1 "register_operand")
-	      (match_operand:<bnd_ptr> 2 "address_mpx_no_base_operand")]))]
-	  UNSPEC_BNDMK))]
-  "TARGET_MPX"
-{
-  operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1],
-						  operands[2]),
-				UNSPEC_BNDMK_ADDR);
-})
-
-(define_insn "*<mode>_mk"
-  [(set (match_operand:BND 0 "register_operand" "=w")
-	(unspec:BND
-	  [(match_operator:<bnd_ptr> 3 "bnd_mem_operator"
-	     [(unspec:<bnd_ptr>
-		[(match_operand:<bnd_ptr> 1 "register_operand" "r")
-		 (match_operand:<bnd_ptr> 2 "address_mpx_no_base_operand" "Tb")]
-	        UNSPEC_BNDMK_ADDR)])]
-	  UNSPEC_BNDMK))]
-  "TARGET_MPX"
-  "bndmk\t{%3, %0|%0, %3}"
-  [(set_attr "type" "mpxmk")])
-
-(define_expand "mov<mode>"
-  [(set (match_operand:BND 0 "general_operand")
-	(match_operand:BND 1 "general_operand"))]
-  "TARGET_MPX"
-  "ix86_expand_move (<MODE>mode, operands); DONE;")
-
-(define_insn "*mov<mode>_internal_mpx"
-  [(set (match_operand:BND 0 "nonimmediate_operand" "=w,m")
-	(match_operand:BND 1 "general_operand" "wm,w"))]
-  "TARGET_MPX"
-  "bndmov\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mpxmov")])
-
-(define_expand "<mode>_<bndcheck>"
-  [(parallel
-     [(unspec
-	[(match_operand:BND 0 "register_operand")
-	 (match_operand:<bnd_ptr> 1 "address_no_seg_operand")] BNDCHECK)
-      (set (match_dup 2)
-	   (unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))])]
-  "TARGET_MPX"
-{
-  operands[2] = gen_rtx_MEM (BLKmode, operands[1]);
-  MEM_VOLATILE_P (operands[2]) = 1;
-})
-
-(define_insn "*<mode>_<bndcheck>"
-  [(unspec
-     [(match_operand:BND 0 "register_operand" "w")
-      (match_operand:<bnd_ptr> 1 "address_no_seg_operand" "Ts")] BNDCHECK)
-   (set (match_operand:BLK 2 "bnd_mem_operator")
-	(unspec:BLK [(match_dup 2)] UNSPEC_MPX_FENCE))]
-  "TARGET_MPX"
-  "bnd<bndcheck>\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "mpxchk")])
-
-(define_expand "<mode>_ldx"
-  [(parallel
-     [(set (match_operand:BND 0 "register_operand")
-	   (unspec:BND
-	     [(mem:<bnd_ptr>
-		(match_par_dup 3
-		  [(match_operand:<bnd_ptr> 1 "address_mpx_no_index_operand")
-		   (match_operand:<bnd_ptr> 2 "register_operand")]))]
-	     UNSPEC_BNDLDX))
-      (use (mem:BLK (match_dup 1)))])]
-  "TARGET_MPX"
-{
-  /* Avoid registers which cannot be used as index.  */
-  if (!index_register_operand (operands[2], Pmode))
-    operands[2] = copy_addr_to_reg (operands[2]);
-
-  operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1],
-						  operands[2]),
-				UNSPEC_BNDLDX_ADDR);
-})
-
-(define_insn "*<mode>_ldx"
-  [(set (match_operand:BND 0 "register_operand" "=w")
-	(unspec:BND
-	  [(match_operator:<bnd_ptr> 3 "bnd_mem_operator"
-	     [(unspec:<bnd_ptr>
-		[(match_operand:<bnd_ptr> 1 "address_mpx_no_index_operand" "Ti")
-		 (match_operand:<bnd_ptr> 2 "register_operand" "l")]
-		UNSPEC_BNDLDX_ADDR)])]
-	  UNSPEC_BNDLDX))
-   (use (mem:BLK (match_dup 1)))]
-  "TARGET_MPX"
-  "bndldx\t{%3, %0|%0, %3}"
-  [(set_attr "type" "mpxld")])
-
-(define_expand "<mode>_stx"
-  [(parallel
-     [(unspec
-	[(mem:<bnd_ptr>
-	   (match_par_dup 3
-	     [(match_operand:<bnd_ptr> 0 "address_mpx_no_index_operand")
-	      (match_operand:<bnd_ptr> 1 "register_operand")]))
-	 (match_operand:BND 2 "register_operand")]
-	UNSPEC_BNDSTX)
-      (set (match_dup 4)
-	   (unspec:BLK [(match_dup 4)] UNSPEC_MPX_FENCE))])]
-  "TARGET_MPX"
-{
-  /* Avoid registers which cannot be used as index.  */
-  if (!index_register_operand (operands[1], Pmode))
-    operands[1] = copy_addr_to_reg (operands[1]);
-
-  operands[3] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[0],
-						  operands[1]),
-				UNSPEC_BNDLDX_ADDR);
-  operands[4] = gen_rtx_MEM (BLKmode, operands[0]);
-  MEM_VOLATILE_P (operands[4]) = 1;
-})
-
-(define_insn "*<mode>_stx"
-  [(unspec
-     [(match_operator:<bnd_ptr> 3 "bnd_mem_operator"
-	[(unspec:<bnd_ptr>
-	   [(match_operand:<bnd_ptr> 0 "address_mpx_no_index_operand" "Ti")
-	    (match_operand:<bnd_ptr> 1 "register_operand" "l")]
-	   UNSPEC_BNDLDX_ADDR)])
-	 (match_operand:BND 2 "register_operand" "w")]
-	UNSPEC_BNDSTX)
-   (set (match_operand:BLK 4 "bnd_mem_operator")
-	(unspec:BLK [(match_dup 4)] UNSPEC_MPX_FENCE))]
-  "TARGET_MPX"
-  "bndstx\t{%2, %3|%3, %2}"
-  [(set_attr "type" "mpxst")])
-
-(define_insn "move_size_reloc_<mode>"
-  [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(unspec:SWI48
-	  [(match_operand:SWI48 1 "symbol_operand")]
-	UNSPEC_SIZEOF))]
-  "TARGET_MPX"
-{
-  if (x86_64_immediate_size_operand (operands[1], VOIDmode))
-    return "mov{l}\t{%1@SIZE, %k0|%k0, %1@SIZE}";
-  else
-    return "movabs{q}\t{%1@SIZE, %0|%0, %1@SIZE}";
-}
-  [(set_attr "type" "imov")
-   (set_attr "mode" "<MODE>")])
-
 ;; RDPKRU and WRPKRU
 
 (define_expand "rdpkru"
@@ -20447,10 +19806,112 @@
 (define_insn "rdpid"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
-  "TARGET_RDPID"
+  "!TARGET_64BIT && TARGET_RDPID"
+  "rdpid\t%0"
+  [(set_attr "type" "other")])
+
+(define_insn "rdpid_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
+  "TARGET_64BIT && TARGET_RDPID"
   "rdpid\t%0"
   [(set_attr "type" "other")])
 
+;; Intirinsics for > i486
+
+(define_insn "wbinvd"
+  [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
+  ""
+  "wbinvd"
+  [(set_attr "type" "other")])
+
+(define_insn "wbnoinvd"
+  [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
+  "TARGET_WBNOINVD"
+  "wbnoinvd"
+  [(set_attr "type" "other")])
+
+;; MOVDIRI and MOVDIR64B
+
+(define_insn "movdiri<mode>"
+  [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
+			   (match_operand:SWI48 1 "register_operand" "r")]
+			  UNSPECV_MOVDIRI)]
+  "TARGET_MOVDIRI"
+  "movdiri\t{%1, %0|%0, %1}"
+  [(set_attr "type" "other")])
+
+(define_insn "movdir64b_<mode>"
+  [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
+			(match_operand:XI 1 "memory_operand")]
+		       UNSPECV_MOVDIR64B)]
+  "TARGET_MOVDIR64B"
+  "movdir64b\t{%1, %0|%0, %1}"
+  [(set_attr "type" "other")])
+
+;; WAITPKG
+
+(define_insn "umwait"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+			      (match_operand:DI 1 "register_operand" "A")]
+			     UNSPECV_UMWAIT))]
+  "!TARGET_64BIT && TARGET_WAITPKG"
+  "umwait\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "umwait_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+			      (match_operand:SI 1 "register_operand" "a")
+			      (match_operand:SI 2 "register_operand" "d")]
+			     UNSPECV_UMWAIT))]
+  "TARGET_64BIT && TARGET_WAITPKG"
+  "umwait\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "umonitor_<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+		    UNSPECV_UMONITOR)]
+  "TARGET_WAITPKG"
+  "umonitor\t%0"
+  [(set (attr "length")
+     (symbol_ref ("(Pmode != word_mode) + 3")))])
+
+(define_insn "tpause"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+			      (match_operand:DI 1 "register_operand" "A")]
+			     UNSPECV_TPAUSE))]
+  "!TARGET_64BIT && TARGET_WAITPKG"
+  "tpause\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "tpause_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+			      (match_operand:SI 1 "register_operand" "a")
+			      (match_operand:SI 2 "register_operand" "d")]
+			     UNSPECV_TPAUSE))]
+  "TARGET_64BIT && TARGET_WAITPKG"
+  "tpause\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "cldemote"
+  [(unspec_volatile[(match_operand 0 "address_operand" "p")]
+		 UNSPECV_CLDEMOTE)]
+  "TARGET_CLDEMOTE"
+  "cldemote\t%a0"
+  [(set_attr "type" "other")
+   (set_attr "memory" "unknown")])
+
+(define_insn "speculation_barrier"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
+  ""
+  "lfence"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")