diff gcc/config/i386/i386.md @ 69:1b10fe6932e1

merge 69
author Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 21 Aug 2011 07:53:12 +0900
parents 326d9e06c2e3 f6334be47118
children ab0bcb71f44d
line wrap: on
line diff
--- a/gcc/config/i386/i386.md	Tue Dec 14 03:58:33 2010 +0900
+++ b/gcc/config/i386/i386.md	Sun Aug 21 07:53:12 2011 +0900
@@ -1,6 +1,6 @@
 ;; GCC machine description for IA-32 and x86-64.
 ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
 ;; Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
@@ -30,7 +30,6 @@
 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
 ;; C -- print opcode suffix for set/cmov insn.
 ;; c -- like C, but print reversed condition
-;; E,e -- likewise, but for compare-and-branch fused insn.
 ;; F,f -- likewise, but for floating-point.
 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
 ;;      otherwise nothing
@@ -60,199 +59,218 @@
 ;; Y -- print condition for XOP pcom* instruction.
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
+;; @ -- print a segment register of thread base pointer load
 
 ;; UNSPEC usage:
 
-(define_constants
-  [; Relocation specifiers
-   (UNSPEC_GOT			0)
-   (UNSPEC_GOTOFF		1)
-   (UNSPEC_GOTPCREL		2)
-   (UNSPEC_GOTTPOFF		3)
-   (UNSPEC_TPOFF		4)
-   (UNSPEC_NTPOFF		5)
-   (UNSPEC_DTPOFF		6)
-   (UNSPEC_GOTNTPOFF		7)
-   (UNSPEC_INDNTPOFF		8)
-   (UNSPEC_PLTOFF		9)
-   (UNSPEC_MACHOPIC_OFFSET	10)
-
-   ; Prologue support
-   (UNSPEC_STACK_ALLOC		11)
-   (UNSPEC_SET_GOT		12)
-   (UNSPEC_SSE_PROLOGUE_SAVE	13)
-   (UNSPEC_REG_SAVE		14)
-   (UNSPEC_DEF_CFA		15)
-   (UNSPEC_SET_RIP		16)
-   (UNSPEC_SET_GOT_OFFSET	17)
-   (UNSPEC_MEMORY_BLOCKAGE	18)
-
-   ; TLS support
-   (UNSPEC_TP			20)
-   (UNSPEC_TLS_GD		21)
-   (UNSPEC_TLS_LD_BASE		22)
-   (UNSPEC_TLSDESC		23)
-
-   ; Other random patterns
-   (UNSPEC_SCAS			30)
-   (UNSPEC_FNSTSW		31)
-   (UNSPEC_SAHF			32)
-   (UNSPEC_FSTCW		33)
-   (UNSPEC_ADD_CARRY		34)
-   (UNSPEC_FLDCW		35)
-   (UNSPEC_REP			36)
-   (UNSPEC_LD_MPIC		38)	; load_macho_picbase
-   (UNSPEC_TRUNC_NOOP		39)
-
-   ; For SSE/MMX support:
-   (UNSPEC_FIX_NOTRUNC		40)
-   (UNSPEC_MASKMOV		41)
-   (UNSPEC_MOVMSK		42)
-   (UNSPEC_MOVNT		43)
-   (UNSPEC_MOVU			44)
-   (UNSPEC_RCP			45)
-   (UNSPEC_RSQRT		46)
-   (UNSPEC_SFENCE		47)
-   (UNSPEC_PFRCP		49)
-   (UNSPEC_PFRCPIT1		40)
-   (UNSPEC_PFRCPIT2		41)
-   (UNSPEC_PFRSQRT		42)
-   (UNSPEC_PFRSQIT1		43)
-   (UNSPEC_MFENCE		44)
-   (UNSPEC_LFENCE		45)
-   (UNSPEC_PSADBW		46)
-   (UNSPEC_LDDQU		47)
-   (UNSPEC_MS_TO_SYSV_CALL	48)
-
-   ; Generic math support
-   (UNSPEC_COPYSIGN		50)
-   (UNSPEC_IEEE_MIN		51)	; not commutative
-   (UNSPEC_IEEE_MAX		52)	; not commutative
-
-   ; x87 Floating point
-   (UNSPEC_SIN			60)
-   (UNSPEC_COS			61)
-   (UNSPEC_FPATAN		62)
-   (UNSPEC_FYL2X		63)
-   (UNSPEC_FYL2XP1		64)
-   (UNSPEC_FRNDINT		65)
-   (UNSPEC_FIST			66)
-   (UNSPEC_F2XM1		67)
-   (UNSPEC_TAN			68)
-   (UNSPEC_FXAM			69)
-
-   ; x87 Rounding
-   (UNSPEC_FRNDINT_FLOOR	70)
-   (UNSPEC_FRNDINT_CEIL 	71)
-   (UNSPEC_FRNDINT_TRUNC	72)
-   (UNSPEC_FRNDINT_MASK_PM	73)
-   (UNSPEC_FIST_FLOOR		74)
-   (UNSPEC_FIST_CEIL 		75)
-
-   ; x87 Double output FP
-   (UNSPEC_SINCOS_COS		80)
-   (UNSPEC_SINCOS_SIN		81)
-   (UNSPEC_XTRACT_FRACT		84)
-   (UNSPEC_XTRACT_EXP		85)
-   (UNSPEC_FSCALE_FRACT		86)
-   (UNSPEC_FSCALE_EXP		87)
-   (UNSPEC_FPREM_F		88)
-   (UNSPEC_FPREM_U		89)
-   (UNSPEC_FPREM1_F		90)
-   (UNSPEC_FPREM1_U		91)
-
-   (UNSPEC_C2_FLAG		95)
-   (UNSPEC_FXAM_MEM		96)
-
-   ; SSP patterns
-   (UNSPEC_SP_SET		100)
-   (UNSPEC_SP_TEST		101)
-   (UNSPEC_SP_TLS_SET		102)
-   (UNSPEC_SP_TLS_TEST		103)
-
-   ; SSSE3
-   (UNSPEC_PSHUFB		120)
-   (UNSPEC_PSIGN		121)
-   (UNSPEC_PALIGNR		122)
-
-   ; For SSE4A support
-   (UNSPEC_EXTRQI               130)
-   (UNSPEC_EXTRQ                131)
-   (UNSPEC_INSERTQI             132)
-   (UNSPEC_INSERTQ              133)
-
-   ; For SSE4.1 support
-   (UNSPEC_BLENDV		134)
-   (UNSPEC_INSERTPS		135)
-   (UNSPEC_DP			136)
-   (UNSPEC_MOVNTDQA		137)
-   (UNSPEC_MPSADBW		138)
-   (UNSPEC_PHMINPOSUW		139)
-   (UNSPEC_PTEST		140)
-   (UNSPEC_ROUND		141)
-
-   ; For SSE4.2 support
-   (UNSPEC_CRC32		143)
-   (UNSPEC_PCMPESTR		144)
-   (UNSPEC_PCMPISTR		145)
-
-   ; For FMA4 support
-   (UNSPEC_FMA4_INTRINSIC	150)
-   (UNSPEC_FMA4_FMADDSUB	151)
-   (UNSPEC_FMA4_FMSUBADD	152)
-   (UNSPEC_XOP_UNSIGNED_CMP	151)
-   (UNSPEC_XOP_TRUEFALSE	152)
-   (UNSPEC_XOP_PERMUTE		153)
-   (UNSPEC_FRCZ			154)
-
-   ; For AES support
-   (UNSPEC_AESENC		159)
-   (UNSPEC_AESENCLAST		160)
-   (UNSPEC_AESDEC		161)
-   (UNSPEC_AESDECLAST		162)
-   (UNSPEC_AESIMC		163)
-   (UNSPEC_AESKEYGENASSIST	164)
-
-   ; For PCLMUL support
-   (UNSPEC_PCLMUL		165)
-
-   ; For AVX support
-   (UNSPEC_PCMP			166)
-   (UNSPEC_VPERMIL		167)
-   (UNSPEC_VPERMIL2F128		168)
-   (UNSPEC_MASKLOAD		169)
-   (UNSPEC_MASKSTORE		170)
-   (UNSPEC_CAST			171)
-   (UNSPEC_VTESTP		172)
-  ])
-
-(define_constants
-  [(UNSPECV_BLOCKAGE		0)
-   (UNSPECV_STACK_PROBE		1)
-   (UNSPECV_EMMS		2)
-   (UNSPECV_LDMXCSR		3)
-   (UNSPECV_STMXCSR		4)
-   (UNSPECV_FEMMS		5)
-   (UNSPECV_CLFLUSH		6)
-   (UNSPECV_ALIGN		7)
-   (UNSPECV_MONITOR		8)
-   (UNSPECV_MWAIT		9)
-   (UNSPECV_CMPXCHG		10)
-   (UNSPECV_XCHG		12)
-   (UNSPECV_LOCK		13)
-   (UNSPECV_PROLOGUE_USE	14)
-   (UNSPECV_CLD			15)
-   (UNSPECV_VZEROALL		16)
-   (UNSPECV_VZEROUPPER		17)
-   (UNSPECV_RDTSC		18)
-   (UNSPECV_RDTSCP		19)
-   (UNSPECV_RDPMC		20)
-   (UNSPECV_VSWAPMOV		21)
-   (UNSPECV_LLWP_INTRINSIC	22)
-   (UNSPECV_SLWP_INTRINSIC	23)
-   (UNSPECV_LWPVAL_INTRINSIC	24)
-   (UNSPECV_LWPINS_INTRINSIC	25)
-  ])
+(define_c_enum "unspec" [
+  ;; Relocation specifiers
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_GOTPCREL
+  UNSPEC_GOTTPOFF
+  UNSPEC_TPOFF
+  UNSPEC_NTPOFF
+  UNSPEC_DTPOFF
+  UNSPEC_GOTNTPOFF
+  UNSPEC_INDNTPOFF
+  UNSPEC_PLTOFF
+  UNSPEC_MACHOPIC_OFFSET
+  UNSPEC_PCREL
+
+  ;; Prologue support
+  UNSPEC_STACK_ALLOC
+  UNSPEC_SET_GOT
+  UNSPEC_REG_SAVE
+  UNSPEC_DEF_CFA
+  UNSPEC_SET_RIP
+  UNSPEC_SET_GOT_OFFSET
+  UNSPEC_MEMORY_BLOCKAGE
+  UNSPEC_STACK_CHECK
+
+  ;; TLS support
+  UNSPEC_TP
+  UNSPEC_TLS_GD
+  UNSPEC_TLS_LD_BASE
+  UNSPEC_TLSDESC
+  UNSPEC_TLS_IE_SUN
+
+  ;; Other random patterns
+  UNSPEC_SCAS
+  UNSPEC_FNSTSW
+  UNSPEC_SAHF
+  UNSPEC_PARITY
+  UNSPEC_FSTCW
+  UNSPEC_ADD_CARRY
+  UNSPEC_FLDCW
+  UNSPEC_REP
+  UNSPEC_LD_MPIC	; load_macho_picbase
+  UNSPEC_TRUNC_NOOP
+  UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_CALL_NEEDS_VZEROUPPER
+
+  ;; For SSE/MMX support:
+  UNSPEC_FIX_NOTRUNC
+  UNSPEC_MASKMOV
+  UNSPEC_MOVMSK
+  UNSPEC_MOVNT
+  UNSPEC_MOVU
+  UNSPEC_RCP
+  UNSPEC_RSQRT
+  UNSPEC_SFENCE
+  UNSPEC_PFRCP
+  UNSPEC_PFRCPIT1
+  UNSPEC_PFRCPIT2
+  UNSPEC_PFRSQRT
+  UNSPEC_PFRSQIT1
+  UNSPEC_MFENCE
+  UNSPEC_LFENCE
+  UNSPEC_PSADBW
+  UNSPEC_LDDQU
+  UNSPEC_MS_TO_SYSV_CALL
+
+  ;; Generic math support
+  UNSPEC_COPYSIGN
+  UNSPEC_IEEE_MIN	; not commutative
+  UNSPEC_IEEE_MAX	; not commutative
+
+  ;; x87 Floating point
+  UNSPEC_SIN
+  UNSPEC_COS
+  UNSPEC_FPATAN
+  UNSPEC_FYL2X
+  UNSPEC_FYL2XP1
+  UNSPEC_FRNDINT
+  UNSPEC_FIST
+  UNSPEC_F2XM1
+  UNSPEC_TAN
+  UNSPEC_FXAM
+
+  ;; x87 Rounding
+  UNSPEC_FRNDINT_FLOOR
+  UNSPEC_FRNDINT_CEIL
+  UNSPEC_FRNDINT_TRUNC
+  UNSPEC_FRNDINT_MASK_PM
+  UNSPEC_FIST_FLOOR
+  UNSPEC_FIST_CEIL
+
+  ;; x87 Double output FP
+  UNSPEC_SINCOS_COS
+  UNSPEC_SINCOS_SIN
+  UNSPEC_XTRACT_FRACT
+  UNSPEC_XTRACT_EXP
+  UNSPEC_FSCALE_FRACT
+  UNSPEC_FSCALE_EXP
+  UNSPEC_FPREM_F
+  UNSPEC_FPREM_U
+  UNSPEC_FPREM1_F
+  UNSPEC_FPREM1_U
+
+  UNSPEC_C2_FLAG
+  UNSPEC_FXAM_MEM
+
+  ;; SSP patterns
+  UNSPEC_SP_SET
+  UNSPEC_SP_TEST
+  UNSPEC_SP_TLS_SET
+  UNSPEC_SP_TLS_TEST
+
+  ;; SSSE3
+  UNSPEC_PSHUFB
+  UNSPEC_PSIGN
+  UNSPEC_PALIGNR
+
+  ;; For SSE4A support
+  UNSPEC_EXTRQI
+  UNSPEC_EXTRQ
+  UNSPEC_INSERTQI
+  UNSPEC_INSERTQ
+
+  ;; For SSE4.1 support
+  UNSPEC_BLENDV
+  UNSPEC_INSERTPS
+  UNSPEC_DP
+  UNSPEC_MOVNTDQA
+  UNSPEC_MPSADBW
+  UNSPEC_PHMINPOSUW
+  UNSPEC_PTEST
+  UNSPEC_ROUND
+
+  ;; For SSE4.2 support
+  UNSPEC_CRC32
+  UNSPEC_PCMPESTR
+  UNSPEC_PCMPISTR
+
+  ;; For FMA4 support
+  UNSPEC_FMADDSUB
+  UNSPEC_XOP_UNSIGNED_CMP
+  UNSPEC_XOP_TRUEFALSE
+  UNSPEC_XOP_PERMUTE
+  UNSPEC_FRCZ
+
+  ;; For AES support
+  UNSPEC_AESENC
+  UNSPEC_AESENCLAST
+  UNSPEC_AESDEC
+  UNSPEC_AESDECLAST
+  UNSPEC_AESIMC
+  UNSPEC_AESKEYGENASSIST
+
+  ;; For PCLMUL support
+  UNSPEC_PCLMUL
+
+  ;; For AVX support
+  UNSPEC_PCMP
+  UNSPEC_VPERMIL
+  UNSPEC_VPERMIL2
+  UNSPEC_VPERMIL2F128
+  UNSPEC_MASKLOAD
+  UNSPEC_MASKSTORE
+  UNSPEC_CAST
+  UNSPEC_VTESTP
+  UNSPEC_VCVTPH2PS
+  UNSPEC_VCVTPS2PH
+
+  ;; For BMI support
+  UNSPEC_BEXTR
+
+  ;; For RDRAND support
+  UNSPEC_RDRAND
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_STACK_PROBE
+  UNSPECV_PROBE_STACK_RANGE
+  UNSPECV_EMMS
+  UNSPECV_LDMXCSR
+  UNSPECV_STMXCSR
+  UNSPECV_FEMMS
+  UNSPECV_CLFLUSH
+  UNSPECV_ALIGN
+  UNSPECV_MONITOR
+  UNSPECV_MWAIT
+  UNSPECV_CMPXCHG
+  UNSPECV_XCHG
+  UNSPECV_LOCK
+  UNSPECV_PROLOGUE_USE
+  UNSPECV_CLD
+  UNSPECV_NOPS
+  UNSPECV_VZEROALL
+  UNSPECV_VZEROUPPER
+  UNSPECV_RDTSC
+  UNSPECV_RDTSCP
+  UNSPECV_RDPMC
+  UNSPECV_LLWP_INTRINSIC
+  UNSPECV_SLWP_INTRINSIC
+  UNSPECV_LWPVAL_INTRINSIC
+  UNSPECV_LWPINS_INTRINSIC
+  UNSPECV_RDFSBASE
+  UNSPECV_RDGSBASE
+  UNSPECV_WRFSBASE
+  UNSPECV_WRGSBASE
+  UNSPECV_SPLIT_STACK_RETURN
+])
 
 ;; Constants to represent pcomtrue/pcomfalse variants
 (define_constants
@@ -341,8 +359,8 @@
 
 
 ;; Processor type.
-(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
-		    generic64,amdfam10"
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
+		    atom,generic64,amdfam10,bdver1,btver1"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
@@ -355,7 +373,7 @@
    push,pop,call,callv,leave,
    str,bitmanip,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
-   sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+   sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
    ssemuladd,sse4arg,lwp,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
@@ -370,7 +388,7 @@
 (define_attr "unit" "integer,i387,sse,mmx,unknown"
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
 	   (const_string "i387")
-	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
 			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
 			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
 	   (const_string "sse")
@@ -451,7 +469,7 @@
 
 ;; Set when REX opcode prefix is used.
 (define_attr "prefix_rex" ""
-  (cond [(ne (symbol_ref "!TARGET_64BIT") (const_int 0))
+  (cond [(eq (symbol_ref "TARGET_64BIT") (const_int 0))
 	   (const_int 0)
 	 (and (eq_attr "mode" "DI")
 	      (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
@@ -676,13 +694,6 @@
   [(set_attr "length" "128")
    (set_attr "type" "multi")])
 
-;; All integer comparison codes.
-(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu])
-
-;; All floating-point comparison codes.
-(define_code_iterator fp_cond [unordered ordered
-			       uneq unge ungt unle unlt ltgt])
-
 (define_code_iterator plusminus [plus minus])
 
 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
@@ -709,26 +720,41 @@
 ;; Mapping of unsigned max and min
 (define_code_iterator umaxmin [umax umin])
 
-;; Mapping of signed/unsigned max and min
-(define_code_iterator maxmin [smax smin umax umin])
-
 ;; Base name for integer and FP insn mnemonic
-(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
-				 (umax "maxu") (umin "minu")])
-(define_code_attr maxminfprefix [(smax "max") (smin "min")])
+(define_code_attr maxmin_int [(smax "maxs") (smin "mins")
+			      (umax "maxu") (umin "minu")])
+(define_code_attr maxmin_float [(smax "max") (smin "min")])
 
 ;; Mapping of logic operators
 (define_code_iterator any_logic [and ior xor])
 (define_code_iterator any_or [ior xor])
 
 ;; Base name for insn mnemonic.
-(define_code_attr logicprefix [(and "and") (ior "or") (xor "xor")])
+(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of shift-right operators
+(define_code_iterator any_shiftrt [lshiftrt ashiftrt])
+
+;; Base name for define_insn
+(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")])
+
+;; Mapping of rotate operators
+(define_code_iterator any_rotate [rotate rotatert])
+
+;; Base name for define_insn
+(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
 
 ;; Mapping of abs neg operators
 (define_code_iterator absneg [abs neg])
 
 ;; Base name for x87 insn mnemonic.
-(define_code_attr absnegprefix [(abs "abs") (neg "chs")])
+(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
 
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
@@ -745,12 +771,24 @@
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
 			     (div "i") (udiv "")])
 
-;; All single word integer modes.
+;; 64bit single word integer modes.
+(define_mode_iterator SWI1248x [QI HI SI DI])
+
+;; 64bit single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48x [SI DI])
+
+;; Single word integer modes.
 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
 
+;; Single word integer modes without SImode and DImode.
+(define_mode_iterator SWI12 [QI HI])
+
 ;; Single word integer modes without DImode.
 (define_mode_iterator SWI124 [QI HI SI])
 
+;; Single word integer modes without QImode and DImode.
+(define_mode_iterator SWI24 [HI SI])
+
 ;; Single word integer modes without QImode.
 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
 
@@ -767,21 +805,36 @@
 			    (HI "TARGET_HIMODE_MATH")
 			    SI (DI "TARGET_64BIT")])
 
+;; Math-dependant single word integer modes without DImode.
+(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
+			       (HI "TARGET_HIMODE_MATH")
+			       SI])
+
 ;; Math-dependant single word integer modes without QImode.
 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
 		      	       SI (DI "TARGET_64BIT")])
 
+;; Double word integer modes.
+(define_mode_iterator DWI [(DI "!TARGET_64BIT")
+			   (TI "TARGET_64BIT")])
+
+;; Double word integer modes as mode attribute.
+(define_mode_attr DWI [(SI "DI") (DI "TI")])
+(define_mode_attr dwi [(SI "di") (DI "ti")])
+
 ;; Half mode for double word integer modes.
 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
 			    (DI "TARGET_64BIT")])
 
-;; Double word integer modes.
-(define_mode_attr DWI [(SI "DI") (DI "TI")])
-(define_mode_attr dwi [(SI "di") (DI "ti")])
-
 ;; Instruction suffix for integer modes.
 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
 
+;; Pointer size prefix for integer modes (Intel asm dialect)
+(define_mode_attr iptrsize [(QI "BYTE")
+			    (HI "WORD")
+			    (SI "DWORD")
+			    (DI "QWORD")])
+
 ;; Register class for integer modes.
 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
 
@@ -794,6 +847,9 @@
 ;; Immediate operand constraint for double integer modes.
 (define_mode_attr di [(SI "iF") (DI "e")])
 
+;; Immediate operand constraint for shifts.
+(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+
 ;; General operand predicate for integer modes.
 (define_mode_attr general_operand
 	[(QI "general_operand")
@@ -809,6 +865,43 @@
 	 (SI "general_operand")
 	 (DI "x86_64_szext_general_operand")])
 
+;; Immediate operand predicate for integer modes.
+(define_mode_attr immediate_operand
+	[(QI "immediate_operand")
+	 (HI "immediate_operand")
+	 (SI "immediate_operand")
+	 (DI "x86_64_immediate_operand")])
+
+;; Nonmemory operand predicate for integer modes.
+(define_mode_attr nonmemory_operand
+	[(QI "nonmemory_operand")
+	 (HI "nonmemory_operand")
+	 (SI "nonmemory_operand")
+	 (DI "x86_64_nonmemory_operand")])
+
+;; Operand predicate for shifts.
+(define_mode_attr shift_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "shiftdi_operand")
+	 (TI "register_operand")])
+
+;; Operand predicate for shift argument.
+(define_mode_attr shift_immediate_operand
+	[(QI "const_1_to_31_operand")
+	 (HI "const_1_to_31_operand")
+	 (SI "const_1_to_31_operand")
+	 (DI "const_1_to_63_operand")])
+
+;; Input operand predicate for arithmetic left shifts.
+(define_mode_attr ashl_input_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "ashldi_input_operand")
+	 (TI "reg_or_pm1_operand")])
+
 ;; SSE and x87 SFmode and DFmode floating point modes
 (define_mode_iterator MODEF [SF DF])
 
@@ -844,8 +937,10 @@
 (include "ppro.md")
 (include "k6.md")
 (include "athlon.md")
+(include "bdver1.md")
 (include "geode.md")
 (include "atom.md")
+(include "core2.md")
 
 
 ;; Operand and operator predicates and constraints
@@ -861,7 +956,7 @@
 	(compare:CC (match_operand:SDWIM 1 "nonimmediate_operand" "")
 		    (match_operand:SDWIM 2 "<general_operand>" "")))
    (set (pc) (if_then_else
-	       (match_operator 0 "comparison_operator"
+	       (match_operator 0 "ordered_comparison_operator"
 		[(reg:CC FLAGS_REG) (const_int 0)])
 	       (label_ref (match_operand 3 "" ""))
 	       (pc)))]
@@ -869,9 +964,8 @@
 {
   if (MEM_P (operands[1]) && MEM_P (operands[2]))
     operands[1] = force_reg (<MODE>mode, operands[1]);
-  ix86_compare_op0 = operands[1];
-  ix86_compare_op1 = operands[2];
-  ix86_expand_branch (GET_CODE (operands[0]), operands[3]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
   DONE;
 })
 
@@ -880,24 +974,21 @@
 	(compare:CC (match_operand:SWIM 2 "nonimmediate_operand" "")
 		    (match_operand:SWIM 3 "<general_operand>" "")))
    (set (match_operand:QI 0 "register_operand" "")
-	(match_operator 1 "comparison_operator"
+	(match_operator 1 "ordered_comparison_operator"
 	  [(reg:CC FLAGS_REG) (const_int 0)]))]
   ""
 {
   if (MEM_P (operands[2]) && MEM_P (operands[3]))
     operands[2] = force_reg (<MODE>mode, operands[2]);
-  ix86_compare_op0 = operands[2];
-  ix86_compare_op1 = operands[3];
-  ix86_expand_setcc (GET_CODE (operands[1]), operands[0]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
   DONE;
 })
 
 (define_expand "cmp<mode>_1"
   [(set (reg:CC FLAGS_REG)
 	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand" "")
-		    (match_operand:SWI48 1 "<general_operand>" "")))]
-  ""
-  "")
+		    (match_operand:SWI48 1 "<general_operand>" "")))])
 
 (define_insn "*cmp<mode>_ccno_1"
   [(set (reg FLAGS_REG)
@@ -982,9 +1073,7 @@
 	      (match_operand 0 "ext_register_operand" "")
 	      (const_int 8)
 	      (const_int 8)) 0)
-	  (match_operand:QI 1 "immediate_operand" "")))]
-  ""
-  "")
+	  (match_operand:QI 1 "immediate_operand" "")))])
 
 (define_insn "*cmpqi_ext_3_insn"
   [(set (reg FLAGS_REG)
@@ -1051,9 +1140,8 @@
               (pc)))]
   "TARGET_80387"
 {
-  ix86_compare_op0 = operands[1];
-  ix86_compare_op1 = operands[2];
-  ix86_expand_branch (GET_CODE (operands[0]), operands[3]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
   DONE;
 })
 
@@ -1067,9 +1155,8 @@
                 (const_int 0)]))]
   "TARGET_80387"
 {
-  ix86_compare_op0 = operands[2];
-  ix86_compare_op1 = operands[3];
-  ix86_expand_setcc (GET_CODE (operands[1]), operands[0]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
   DONE;
 })
 
@@ -1085,9 +1172,8 @@
               (pc)))]
   "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
-  ix86_compare_op0 = operands[1];
-  ix86_compare_op1 = operands[2];
-  ix86_expand_branch (GET_CODE (operands[0]), operands[3]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
   DONE;
 })
 
@@ -1101,9 +1187,8 @@
                 (const_int 0)]))]
   "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
-  ix86_compare_op0 = operands[2];
-  ix86_compare_op1 = operands[3];
-  ix86_expand_setcc (GET_CODE (operands[1]), operands[0]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
   DONE;
 })
 
@@ -1116,9 +1201,8 @@
               (pc)))]
   ""
 {
-  ix86_compare_op0 = operands[1];
-  ix86_compare_op1 = operands[2];
-  ix86_expand_branch (GET_CODE (operands[0]), operands[3]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
   DONE;
 })
 
@@ -1129,9 +1213,8 @@
                 (match_operand 3 "const0_operand" "")]))]
   ""
 {
-  ix86_compare_op0 = operands[2];
-  ix86_compare_op1 = operands[3];
-  ix86_expand_setcc (GET_CODE (operands[1]), operands[0]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
   DONE;
 })
 
@@ -1374,15 +1457,17 @@
 		   UNSPEC_SAHF))]
   "TARGET_SAHF"
 {
-#ifdef HAVE_AS_IX86_SAHF
+#ifndef HAVE_AS_IX86_SAHF
+  if (TARGET_64BIT)
+    return ASM_BYTE "0x9e";
+  else
+#endif
   return "sahf";
-#else
-  return ASM_BYTE "0x9e";
-#endif
 }
   [(set_attr "length" "1")
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
 ;; Pentium Pro can do steps 1 through 3 in one go.
@@ -1413,7 +1498,8 @@
 	      ]
 	      (const_string "0")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
 
 (define_insn "*cmpfp_i_sse"
   [(set (reg:CCFP FLAGS_REG)
@@ -1435,7 +1521,8 @@
 		      (const_string "1")
 		      (const_string "0")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
 
 (define_insn "*cmpfp_i_i387"
   [(set (reg:CCFP FLAGS_REG)
@@ -1455,7 +1542,8 @@
 	   ]
 	   (const_string "XF")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
 
 (define_insn "*cmpfp_iu_mixed"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1483,7 +1571,8 @@
 	      ]
 	      (const_string "0")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
 
 (define_insn "*cmpfp_iu_sse"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1505,7 +1594,8 @@
 		      (const_string "1")
 		      (const_string "0")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
 
 (define_insn "*cmpfp_iu_387"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1525,773 +1615,25 @@
 	   ]
 	   (const_string "XF")))
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
 
-;; Move instructions.
-
-;; General case of fullword move.
-
-(define_expand "movsi"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(match_operand:SI 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (SImode, operands); DONE;")
-
-;; Push/pop instructions.  They are separate since autoinc/dec is not a
-;; general_operand.
-;;
-;; %%% We don't use a post-inc memory reference because x86 is not a
-;; general AUTO_INC_DEC host, which impacts how it is treated in flow.
-;; Changing this impacts compiler performance on other non-AUTO_INC_DEC
-;; targets without our curiosities, and it is just as easy to represent
-;; this differently.
-
-(define_insn "*pushsi2"
-  [(set (match_operand:SI 0 "push_operand" "=<")
-	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
-  "!TARGET_64BIT"
-  "push{l}\t%1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "SI")])
-
-;; For 64BIT abi we always round up to 8 bytes.
-(define_insn "*pushsi2_rex64"
-  [(set (match_operand:SI 0 "push_operand" "=X")
-	(match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))]
-  "TARGET_64BIT"
-  "push{q}\t%q1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "SI")])
-
-(define_insn "*pushsi2_prologue"
-  [(set (match_operand:SI 0 "push_operand" "=<")
-	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))
-   (clobber (mem:BLK (scratch)))]
-  "!TARGET_64BIT"
-  "push{l}\t%1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "SI")])
-
-(define_insn "*popsi1_epilogue"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
-	(mem:SI (reg:SI SP_REG)))
-   (set (reg:SI SP_REG)
-	(plus:SI (reg:SI SP_REG) (const_int 4)))
-   (clobber (mem:BLK (scratch)))]
-  "!TARGET_64BIT"
-  "pop{l}\t%0"
-  [(set_attr "type" "pop")
-   (set_attr "mode" "SI")])
-
-(define_insn "popsi1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
-	(mem:SI (reg:SI SP_REG)))
-   (set (reg:SI SP_REG)
-	(plus:SI (reg:SI SP_REG) (const_int 4)))]
-  "!TARGET_64BIT"
-  "pop{l}\t%0"
-  [(set_attr "type" "pop")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movsi_xor"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operand:SI 1 "const0_operand" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  "xor{l}\t%0, %0"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "SI")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*movsi_or"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operand:SI 1 "immediate_operand" "i"))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && operands[1] == constm1_rtx"
-{
-  operands[1] = constm1_rtx;
-  return "or{l}\t{%1, %0|%0, %1}";
-}
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "SI")
-   (set_attr "length_immediate" "1")])
-
-(define_insn "*movsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand"
-			"=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
-	(match_operand:SI 1 "general_operand"
-			"g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_SSELOG1:
-      if (get_attr_mode (insn) == MODE_TI)
-        return "%vpxor\t%0, %d0";
-      return "%vxorps\t%0, %d0";
-
-    case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-	case MODE_SI:
-          return "%vmovd\t{%1, %0|%0, %1}";
-	case MODE_SF:
-          return "%vmovss\t{%1, %0|%0, %1}";
-	default:
-	  gcc_unreachable ();
-	}
-
-    case TYPE_MMX:
-      return "pxor\t%0, %0";
-
-    case TYPE_MMXMOV:
-      if (get_attr_mode (insn) == MODE_DI)
-	return "movq\t{%1, %0|%0, %1}";
-      return "movd\t{%1, %0|%0, %1}";
-
-    case TYPE_LEA:
-      return "lea{l}\t{%1, %0|%0, %1}";
-
-    default:
-      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
-      return "mov{l}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (cond [(eq_attr "alternative" "2")
-	      (const_string "mmx")
-	    (eq_attr "alternative" "3,4,5")
-	      (const_string "mmxmov")
-	    (eq_attr "alternative" "6")
-	      (const_string "sselog1")
-	    (eq_attr "alternative" "7,8,9,10,11")
-	      (const_string "ssemov")
- 	    (match_operand:DI 1 "pic_32bit_operand" "")
-	      (const_string "lea")
-	   ]
-	   (const_string "imov")))
-   (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
-       (const_string "orig")
-       (const_string "maybe_vex")))
-   (set (attr "prefix_data16")
-     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "mode")
-     (cond [(eq_attr "alternative" "2,3")
-	      (const_string "DI")
-	    (eq_attr "alternative" "6,7")
-	      (if_then_else
-	        (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-	        (const_string "V4SF")
-	        (const_string "TI"))
-	    (and (eq_attr "alternative" "8,9,10,11")
-	         (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
-	      (const_string "SF")
-	   ]
-	   (const_string "SI")))])
-
-;; Stores and loads of ax to arbitrary constant address.
-;; We fake an second form of instruction to force reload to load address
-;; into register when rax is not available
-(define_insn "*movabssi_1_rex64"
-  [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
-	(match_operand:SI 1 "nonmemory_operand" "a,er"))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
-  "@
-   movabs{l}\t{%1, %P0|%P0, %1}
-   mov{l}\t{%1, %a0|%a0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "memory" "store")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movabssi_2_rex64"
-  [(set (match_operand:SI 0 "register_operand" "=a,r")
-        (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
-  "@
-   movabs{l}\t{%P1, %0|%0, %P1}
-   mov{l}\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0")
-   (set_attr "memory" "load")
-   (set_attr "mode" "SI")])
-
-(define_insn "*swapsi"
-  [(set (match_operand:SI 0 "register_operand" "+r")
-	(match_operand:SI 1 "register_operand" "+r"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  ""
-  "xchg{l}\t%1, %0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "SI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "double")])
-
-(define_expand "movhi"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-        (match_operand:HI 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (HImode, operands); DONE;")
-
-(define_insn "*pushhi2"
-  [(set (match_operand:HI 0 "push_operand" "=X")
-	(match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
-  "!TARGET_64BIT"
-  "push{l}\t%k1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "SI")])
-
-;; For 64BIT abi we always round up to 8 bytes.
-(define_insn "*pushhi2_rex64"
-  [(set (match_operand:HI 0 "push_operand" "=X")
-	(match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
-  "TARGET_64BIT"
-  "push{q}\t%q1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "DI")])
-
-(define_insn "*movhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
-	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      /* movzwl is faster than movw on p2 due to partial word stalls,
-	 though not as fast as an aligned movl.  */
-      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
-    default:
-      if (get_attr_mode (insn) == MODE_SI)
-        return "mov{l}\t{%k1, %k0|%k0, %k1}";
-      else
-        return "mov{w}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
-	      (const_string "imov")
-	    (and (eq_attr "alternative" "0")
-		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
-			  (const_int 0))
-		      (eq (symbol_ref "TARGET_HIMODE_MATH")
-			  (const_int 0))))
-	      (const_string "imov")
-	    (and (eq_attr "alternative" "1,2")
-		 (match_operand:HI 1 "aligned_operand" ""))
-	      (const_string "imov")
-	    (and (ne (symbol_ref "TARGET_MOVX")
-		     (const_int 0))
-		 (eq_attr "alternative" "0,2"))
-	      (const_string "imovx")
-	   ]
-	   (const_string "imov")))
-    (set (attr "mode")
-      (cond [(eq_attr "type" "imovx")
-	       (const_string "SI")
-	     (and (eq_attr "alternative" "1,2")
-		  (match_operand:HI 1 "aligned_operand" ""))
-	       (const_string "SI")
-	     (and (eq_attr "alternative" "0")
-		  (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
-			   (const_int 0))
-		       (eq (symbol_ref "TARGET_HIMODE_MATH")
-			   (const_int 0))))
-	       (const_string "SI")
-	    ]
-	    (const_string "HI")))])
-
-;; Stores and loads of ax to arbitrary constant address.
-;; We fake an second form of instruction to force reload to load address
-;; into register when rax is not available
-(define_insn "*movabshi_1_rex64"
-  [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
-	(match_operand:HI 1 "nonmemory_operand" "a,er"))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
-  "@
-   movabs{w}\t{%1, %P0|%P0, %1}
-   mov{w}\t{%1, %a0|%a0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "memory" "store")
-   (set_attr "mode" "HI")])
-
-(define_insn "*movabshi_2_rex64"
-  [(set (match_operand:HI 0 "register_operand" "=a,r")
-        (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
-  "@
-   movabs{w}\t{%P1, %0|%0, %P1}
-   mov{w}\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0")
-   (set_attr "memory" "load")
-   (set_attr "mode" "HI")])
-
-(define_insn "*swaphi_1"
-  [(set (match_operand:HI 0 "register_operand" "+r")
-	(match_operand:HI 1 "register_operand" "+r"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "xchg{l}\t%k1, %k0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "SI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "double")])
-
-;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
-(define_insn "*swaphi_2"
-  [(set (match_operand:HI 0 "register_operand" "+r")
-	(match_operand:HI 1 "register_operand" "+r"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "TARGET_PARTIAL_REG_STALL"
-  "xchg{w}\t%1, %0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "HI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
-
-(define_expand "movstricthi"
-  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
-	(match_operand:HI 1 "general_operand" ""))]
-  ""
-{
-  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
-    FAIL;
-  /* Don't generate memory->memory moves, go through a register */
-  if (MEM_P (operands[0]) && MEM_P (operands[1]))
-    operands[1] = force_reg (HImode, operands[1]);
-})
-
-(define_insn "*movstricthi_1"
-  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r"))
-	(match_operand:HI 1 "general_operand" "rn,m"))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "mov{w}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "HI")])
-
-(define_insn "*movstricthi_xor"
-  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
-	(match_operand:HI 1 "const0_operand" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  "xor{w}\t%0, %0"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "HI")
-   (set_attr "length_immediate" "0")])
-
-(define_expand "movqi"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(match_operand:QI 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (QImode, operands); DONE;")
-
-;; emit_push_insn when it calls move_by_pieces requires an insn to
-;; "push a byte".  But actually we use pushl, which has the effect
-;; of rounding the amount pushed up to a word.
-
-(define_insn "*pushqi2"
-  [(set (match_operand:QI 0 "push_operand" "=X")
-	(match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))]
-  "!TARGET_64BIT"
-  "push{l}\t%k1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "SI")])
-
-;; For 64BIT abi we always round up to 8 bytes.
-(define_insn "*pushqi2_rex64"
-  [(set (match_operand:QI 0 "push_operand" "=X")
-	(match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))]
-  "TARGET_64BIT"
-  "push{q}\t%q1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "DI")])
-
-;; Situation is quite tricky about when to choose full sized (SImode) move
-;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
-;; partial register dependency machines (such as AMD Athlon), where QImode
-;; moves issue extra dependency and for partial register stalls machines
-;; that don't use QImode patterns (and QImode move cause stall on the next
-;; instruction).
-;;
-;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
-;; register stall machines with, where we use QImode instructions, since
-;; partial register stall can be caused there.  Then we use movzx.
-(define_insn "*movqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
-	(match_operand:QI 1 "general_operand"      " q,qn,qm,q,rn,qm,qn"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
-      return "movz{bl|x}\t{%1, %k0|%k0, %1}";
-    default:
-      if (get_attr_mode (insn) == MODE_SI)
-        return "mov{l}\t{%k1, %k0|%k0, %k1}";
-      else
-        return "mov{b}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (eq_attr "alternative" "5")
-		 (not (match_operand:QI 1 "aligned_operand" "")))
-	      (const_string "imovx")
-	    (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
-	      (const_string "imov")
-	    (and (eq_attr "alternative" "3")
-		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
-			  (const_int 0))
-		      (eq (symbol_ref "TARGET_QIMODE_MATH")
-			  (const_int 0))))
-	      (const_string "imov")
-	    (eq_attr "alternative" "3,5")
-	      (const_string "imovx")
-	    (and (ne (symbol_ref "TARGET_MOVX")
-		     (const_int 0))
-		 (eq_attr "alternative" "2"))
-	      (const_string "imovx")
-	   ]
-	   (const_string "imov")))
-   (set (attr "mode")
-      (cond [(eq_attr "alternative" "3,4,5")
-	       (const_string "SI")
-	     (eq_attr "alternative" "6")
-	       (const_string "QI")
-	     (eq_attr "type" "imovx")
-	       (const_string "SI")
-	     (and (eq_attr "type" "imov")
-		  (and (eq_attr "alternative" "0,1")
-		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
-				(const_int 0))
-			    (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
-				     (const_int 0))
-			    	 (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
-				     (const_int 0))))))
-	       (const_string "SI")
-	     ;; Avoid partial register stalls when not using QImode arithmetic
-	     (and (eq_attr "type" "imov")
-		  (and (eq_attr "alternative" "0,1")
-		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
-				(const_int 0))
-			    (eq (symbol_ref "TARGET_QIMODE_MATH")
-				(const_int 0)))))
-	       (const_string "SI")
-	   ]
-	   (const_string "QI")))])
-
-(define_insn "*swapqi_1"
-  [(set (match_operand:QI 0 "register_operand" "+r")
-	(match_operand:QI 1 "register_operand" "+r"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "xchg{l}\t%k1, %k0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "SI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")])
-
-;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
-(define_insn "*swapqi_2"
-  [(set (match_operand:QI 0 "register_operand" "+q")
-	(match_operand:QI 1 "register_operand" "+q"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "TARGET_PARTIAL_REG_STALL"
-  "xchg{b}\t%1, %0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
-
-(define_expand "movstrictqi"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
-	(match_operand:QI 1 "general_operand" ""))]
-  ""
-{
-  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
-    FAIL;
-  /* Don't generate memory->memory moves, go through a register.  */
-  if (MEM_P (operands[0]) && MEM_P (operands[1]))
-    operands[1] = force_reg (QImode, operands[1]);
-})
-
-(define_insn "*movstrictqi_1"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
-	(match_operand:QI 1 "general_operand" "*qn,m"))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "mov{b}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")])
-
-(define_insn "*movstrictqi_xor"
-  [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
-	(match_operand:QI 1 "const0_operand" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  "xor{b}\t%0, %0"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "QI")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*movsi_extv_1"
-  [(set (match_operand:SI 0 "register_operand" "=R")
-	(sign_extract:SI (match_operand 1 "ext_register_operand" "Q")
-			 (const_int 8)
-			 (const_int 8)))]
-  ""
-  "movs{bl|x}\t{%h1, %0|%0, %h1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movhi_extv_1"
-  [(set (match_operand:HI 0 "register_operand" "=R")
-	(sign_extract:HI (match_operand 1 "ext_register_operand" "Q")
-			 (const_int 8)
-			 (const_int 8)))]
-  ""
-  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movqi_extv_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
-        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
-                         (const_int 8)
-                         (const_int 8)))]
-  "!TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
-    default:
-      return "mov{b}\t{%h1, %0|%0, %h1}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (and (match_operand:QI 0 "register_operand" "")
-			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
-			     (ne (symbol_ref "TARGET_MOVX")
-				 (const_int 0))))
-	(const_string "imovx")
-	(const_string "imov")))
-   (set (attr "mode")
-     (if_then_else (eq_attr "type" "imovx")
-	(const_string "SI")
-	(const_string "QI")))])
-
-(define_insn "*movqi_extv_1_rex64"
-  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
-        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
-                         (const_int 8)
-                         (const_int 8)))]
-  "TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
-    default:
-      return "mov{b}\t{%h1, %0|%0, %h1}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (and (match_operand:QI 0 "register_operand" "")
-			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
-			     (ne (symbol_ref "TARGET_MOVX")
-				 (const_int 0))))
-	(const_string "imovx")
-	(const_string "imov")))
-   (set (attr "mode")
-     (if_then_else (eq_attr "type" "imovx")
-	(const_string "SI")
-	(const_string "QI")))])
-
-;; Stores and loads of ax to arbitrary constant address.
-;; We fake an second form of instruction to force reload to load address
-;; into register when rax is not available
-(define_insn "*movabsqi_1_rex64"
-  [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
-	(match_operand:QI 1 "nonmemory_operand" "a,er"))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
-  "@
-   movabs{b}\t{%1, %P0|%P0, %1}
-   mov{b}\t{%1, %a0|%a0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "memory" "store")
-   (set_attr "mode" "QI")])
-
-(define_insn "*movabsqi_2_rex64"
-  [(set (match_operand:QI 0 "register_operand" "=a,r")
-        (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
-  "@
-   movabs{b}\t{%P1, %0|%0, %P1}
-   mov{b}\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0")
-   (set_attr "memory" "load")
-   (set_attr "mode" "QI")])
-
-(define_insn "*movdi_extzv_1"
-  [(set (match_operand:DI 0 "register_operand" "=R")
-	(zero_extract:DI (match_operand 1 "ext_register_operand" "Q")
-			 (const_int 8)
-			 (const_int 8)))]
-  "TARGET_64BIT"
-  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movsi_extzv_1"
-  [(set (match_operand:SI 0 "register_operand" "=R")
-	(zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
-			 (const_int 8)
-			 (const_int 8)))]
-  ""
-  "movz{bl|x}\t{%h1, %0|%0, %h1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-(define_insn "*movqi_extzv_2"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
-        (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
-				    (const_int 8)
-				    (const_int 8)) 0))]
-  "!TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
-    default:
-      return "mov{b}\t{%h1, %0|%0, %h1}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (and (match_operand:QI 0 "register_operand" "")
-			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
-			     (ne (symbol_ref "TARGET_MOVX")
-				 (const_int 0))))
-	(const_string "imovx")
-	(const_string "imov")))
-   (set (attr "mode")
-     (if_then_else (eq_attr "type" "imovx")
-	(const_string "SI")
-	(const_string "QI")))])
-
-(define_insn "*movqi_extzv_2_rex64"
-  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
-        (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
-				    (const_int 8)
-				    (const_int 8)) 0))]
-  "TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOVX:
-      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
-    default:
-      return "mov{b}\t{%h1, %0|%0, %h1}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
-			(ne (symbol_ref "TARGET_MOVX")
-			    (const_int 0)))
-	(const_string "imovx")
-	(const_string "imov")))
-   (set (attr "mode")
-     (if_then_else (eq_attr "type" "imovx")
-	(const_string "SI")
-	(const_string "QI")))])
-
-(define_insn "movsi_insv_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
-			 (const_int 8)
-			 (const_int 8))
-	(match_operand:SI 1 "general_operand" "Qmn"))]
-  "!TARGET_64BIT"
-  "mov{b}\t{%b1, %h0|%h0, %b1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")])
-
-(define_insn "*movsi_insv_1_rex64"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
-			 (const_int 8)
-			 (const_int 8))
-	(match_operand:SI 1 "nonmemory_operand" "Qn"))]
-  "TARGET_64BIT"
-  "mov{b}\t{%b1, %h0|%h0, %b1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")])
-
-(define_insn "movdi_insv_1_rex64"
-  [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q")
-			 (const_int 8)
-			 (const_int 8))
-	(match_operand:DI 1 "nonmemory_operand" "Qn"))]
-  "TARGET_64BIT"
-  "mov{b}\t{%b1, %h0|%h0, %b1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")])
-
-(define_insn "*movqi_insv_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
-			 (const_int 8)
-			 (const_int 8))
-	(lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
-		     (const_int 8)))]
-  ""
-  "mov{b}\t{%h1, %h0|%h0, %h1}"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "QI")])
-
-(define_expand "movdi"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(match_operand:DI 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (DImode, operands); DONE;")
-
-(define_insn "*pushdi"
-  [(set (match_operand:DI 0 "push_operand" "=<")
-	(match_operand:DI 1 "general_no_elim_operand" "riF*m"))]
-  "!TARGET_64BIT"
+;; Push/pop instructions.
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:DWI 0 "push_operand" "=<")
+	(match_operand:DWI 1 "general_no_elim_operand" "riF*m"))]
+  ""
   "#")
 
+(define_split
+  [(set (match_operand:TI 0 "push_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_64BIT && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_insn "*pushdi2_rex64"
   [(set (match_operand:DI 0 "push_operand" "=<,!<")
 	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
@@ -2313,8 +1655,7 @@
   "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 ;; We need to define this as both peepholer and splitter for case
 ;; peephole2 pass is not run.
@@ -2326,11 +1667,13 @@
    && !x86_64_immediate_operand (operands[1], DImode) && 1"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (&operands[1], 1, &operands[2], &operands[3]);
-   operands[1] = gen_lowpart (DImode, operands[2]);
-   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
-						    GEN_INT (4)));
-  ")
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
@@ -2341,114 +1684,281 @@
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (&operands[1], 1, &operands[2], &operands[3]);
-   operands[1] = gen_lowpart (DImode, operands[2]);
-   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
-						    GEN_INT (4)));
-  ")
-
-(define_insn "*pushdi2_prologue_rex64"
-  [(set (match_operand:DI 0 "push_operand" "=<")
-	(match_operand:DI 1 "general_no_elim_operand" "re*m"))
-   (clobber (mem:BLK (scratch)))]
-  "TARGET_64BIT"
-  "push{q}\t%1"
-  [(set_attr "type" "push")
-   (set_attr "mode" "DI")])
-
-(define_insn "*popdi1_epilogue_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
-	(mem:DI (reg:DI SP_REG)))
-   (set (reg:DI SP_REG)
-	(plus:DI (reg:DI SP_REG) (const_int 8)))
-   (clobber (mem:BLK (scratch)))]
-  "TARGET_64BIT"
-  "pop{q}\t%0"
-  [(set_attr "type" "pop")
-   (set_attr "mode" "DI")])
-
-(define_insn "popdi1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
-	(mem:DI (reg:DI SP_REG)))
-   (set (reg:DI SP_REG)
-	(plus:DI (reg:DI SP_REG) (const_int 8)))]
-  "TARGET_64BIT"
-  "pop{q}\t%0"
-  [(set_attr "type" "pop")
-   (set_attr "mode" "DI")])
-
-(define_insn "*movdi_xor_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(match_operand:DI 1 "const0_operand" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && reload_completed"
-  "xor{l}\t%k0, %k0";
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "SI")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*movdi_or_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(match_operand:DI 1 "const_int_operand" "i"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && reload_completed
-   && operands[1] == constm1_rtx"
-{
-  operands[1] = constm1_rtx;
-  return "or{q}\t{%1, %0|%0, %1}";
-}
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "DI")
-   (set_attr "length_immediate" "1")])
-
-(define_insn "*movdi_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand"
-			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
-	(match_operand:DI 1 "general_operand"
-			"riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
-  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   #
-   #
-   pxor\t%0, %0
-   movq\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}
-   %vpxor\t%0, %d0
-   %vmovq\t{%1, %0|%0, %1}
-   %vmovdqa\t{%1, %0|%0, %1}
-   %vmovq\t{%1, %0|%0, %1}
-   xorps\t%0, %0
-   movlps\t{%1, %0|%0, %1}
-   movaps\t{%1, %0|%0, %1}
-   movlps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
-   (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "5,6,7,8")
-       (const_string "vex")
-       (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
   "!TARGET_64BIT && reload_completed
-   && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
-;; %%% This multiword shite has got to go.
-(define_split
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-        (match_operand:DI 1 "general_operand" ""))]
-  "!TARGET_64BIT && reload_completed
-   && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
-   && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+(define_insn "*pushsi2"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+  "!TARGET_64BIT"
+  "push{l}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte/word".  But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+;; For TARGET_64BIT we always round up to 8 bytes.
+(define_insn "*push<mode>2_rex64"
+  [(set (match_operand:SWI124 0 "push_operand" "=X")
+	(match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:SWI12 0 "push_operand" "=X")
+	(match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
+  "!TARGET_64BIT"
+  "push{l}\t%k1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+(define_insn "*push<mode>2_prologue"
+  [(set (match_operand:P 0 "push_operand" "=<")
+	(match_operand:P 1 "general_no_elim_operand" "r<i>*m"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "push{<imodesuffix>}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1"
+  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
+	(match_operand:P 1 "pop_operand" ">"))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1_epilogue"
+  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
+	(match_operand:P 1 "pop_operand" ">"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+;; Move instructions.
+
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "")
+	(match_operand:OI 1 "general_operand" ""))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT || TARGET_SSE"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else if (push_operand (operands[0], TImode))
+    ix86_expand_push (TImode, operands[1]);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
+
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern.  Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+  [(set (match_operand:CDI 0 "nonimmediate_operand" "")
+	(match_operand:CDI 1 "general_operand" ""))]
+  ""
+{
+  if (push_operand (operands[0], CDImode))
+    emit_move_complex_push (CDImode, operands[0], operands[1]);
+  else
+    emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SWI1248x 0 "nonimmediate_operand" "")
+	(match_operand:SWI1248x 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*mov<mode>_xor"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{l}\t%k0, %k0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_or"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const_int_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && operands[1] == constm1_rtx"
+  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*movoi_internal_avx"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vxorps\t%0, %0, %0";
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	return "vmovdqu\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "*movti_internal_rex64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
+	(match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
+   (set (attr "mode")
+   	(cond [(eq_attr "alternative" "2,3")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "4")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "reload_completed
+   && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
-(define_insn "*movdi_1_rex64"
+(define_insn "*movti_internal_sse"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 1:
+    case 2:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		    (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			(const_int 0)))
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "2")
+		    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			(const_int 0)))
+		 (const_string "V4SF")]
+	      (const_string "TI")))])
+
+(define_insn "*movdi_internal_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
 	  "=r,r  ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
 	(match_operand:DI 1 "general_operand"
@@ -2540,37 +2050,6 @@
        (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
 
-;; Stores and loads of ax to arbitrary constant address.
-;; We fake an second form of instruction to force reload to load address
-;; into register when rax is not available
-(define_insn "*movabsdi_1_rex64"
-  [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
-	(match_operand:DI 1 "nonmemory_operand" "a,er"))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
-  "@
-   movabs{q}\t{%1, %P0|%P0, %1}
-   mov{q}\t{%1, %a0|%a0, %1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "memory" "store")
-   (set_attr "mode" "DI")])
-
-(define_insn "*movabsdi_2_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=a,r")
-        (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
-  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
-  "@
-   movabs{q}\t{%P1, %0|%0, %P1}
-   mov{q}\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0,*")
-   (set_attr "length_address" "8,0")
-   (set_attr "length_immediate" "0")
-   (set_attr "memory" "load")
-   (set_attr "mode" "DI")])
-
 ;; Convert impossible stores of immediate to existing instructions.
 ;; First try to get scratch register and go through it.  In case this
 ;; fails, move by 32bit parts.
@@ -2581,8 +2060,7 @@
   "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 ;; We need to define this as both peepholer and splitter for case
 ;; peephole2 pass is not run.
@@ -2594,7 +2072,7 @@
    && !x86_64_immediate_operand (operands[1], DImode) && 1"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
-  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
+  "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);")
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
@@ -2605,201 +2083,654 @@
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
-  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
-
-(define_insn "*swapdi_rex64"
-  [(set (match_operand:DI 0 "register_operand" "+r")
-	(match_operand:DI 1 "register_operand" "+r"))
+  "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);")
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
+	(match_operand:DI 1 "general_operand"
+			"riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   #
+   #
+   pxor\t%0, %0
+   movq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
+   xorps\t%0, %0
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			"=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+	(match_operand:SI 1 "general_operand"
+			"g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      if (get_attr_mode (insn) == MODE_TI)
+        return "%vpxor\t%0, %d0";
+      return "%vxorps\t%0, %d0";
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_SI:
+          return "%vmovd\t{%1, %0|%0, %1}";
+	case MODE_SF:
+          return "%vmovss\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      if (get_attr_mode (insn) == MODE_DI)
+	return "movq\t{%1, %0|%0, %1}";
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_LEA:
+      return "lea{l}\t{%a1, %0|%0, %a1}";
+
+    default:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      return "mov{l}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "3,4,5")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "6")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "7,8,9,10,11")
+	      (const_string "ssemov")
+ 	    (match_operand:DI 1 "pic_32bit_operand" "")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2,3")
+	      (const_string "DI")
+	    (eq_attr "alternative" "6,7")
+	      (if_then_else
+	        (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+	        (const_string "V4SF")
+	        (const_string "TI"))
+	    (and (eq_attr "alternative" "8,9,10,11")
+	         (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+	      (const_string "SF")
+	   ]
+	   (const_string "SI")))])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{w}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		(const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "0")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_HIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand" ""))
+	      (const_string "imov")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "0,2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+    (set (attr "mode")
+      (cond [(eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "1,2")
+		  (match_operand:HI 1 "aligned_operand" ""))
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "0")
+		  (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			   (const_int 0))
+		       (eq (symbol_ref "TARGET_HIMODE_MATH")
+			   (const_int 0))))
+	       (const_string "SI")
+	    ]
+	    (const_string "HI")))])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there.  Then we use movzx.
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+	(match_operand:QI 1 "general_operand"      " q,qn,qm,q,rn,qm,qn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
+      return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (eq_attr "alternative" "5")
+		 (not (match_operand:QI 1 "aligned_operand" "")))
+	      (const_string "imovx")
+	    (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		(const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "3")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_QIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (eq_attr "alternative" "3,5")
+	      (const_string "imovx")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+   (set (attr "mode")
+      (cond [(eq_attr "alternative" "3,4,5")
+	       (const_string "SI")
+	     (eq_attr "alternative" "6")
+	       (const_string "QI")
+	     (eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+				(const_int 0))
+			    (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
+				     (const_int 0))
+				 (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				     (const_int 0))))))
+	       (const_string "SI")
+	     ;; Avoid partial register stalls when not using QImode arithmetic
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				(const_int 0))
+			    (eq (symbol_ref "TARGET_QIMODE_MATH")
+				(const_int 0)))))
+	       (const_string "SI")
+	   ]
+	   (const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabs<mode>_1"
+  [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:SWI1248x 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{<imodesuffix>}\t{%1, %P0|%P0, %1}
+   mov{<imodesuffix>}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movabs<mode>_2"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
+        (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{<imodesuffix>}\t{%P1, %0|%0, %P1}
+   mov{<imodesuffix>}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "+r")
+	(match_operand:SWI48 1 "register_operand" "+r"))
    (set (match_dup 1)
 	(match_dup 0))]
-  "TARGET_64BIT"
-  "xchg{q}\t%1, %0"
+  ""
+  "xchg{<imodesuffix>}\t%1, %0"
   [(set_attr "type" "imov")
-   (set_attr "mode" "DI")
+   (set_attr "mode" "<MODE>")
    (set_attr "pent_pair" "np")
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "double")])
-
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand" "")
-	(match_operand:OI 1 "general_operand" ""))]
-  "TARGET_AVX"
-  "ix86_expand_move (OImode, operands); DONE;")
-
-(define_insn "*movoi_internal"
-  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
-	(match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_AVX
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return "vxorps\t%0, %0, %0";
-    case 1:
-    case 2:
-      if (misaligned_operand (operands[0], OImode)
-	  || misaligned_operand (operands[1], OImode))
-	return "vmovdqu\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqa\t{%1, %0|%0, %1}";
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "sselog1,ssemov,ssemov")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
-
-(define_expand "movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(match_operand:TI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE || TARGET_64BIT"
-{
-  if (TARGET_64BIT)
-    ix86_expand_move (TImode, operands);
-  else if (push_operand (operands[0], TImode))
-    ix86_expand_push (TImode, operands[1]);
-  else
-    ix86_expand_vector_move (TImode, operands);
-  DONE;
-})
-
-(define_insn "*movti_internal"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
-	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE && !TARGET_64BIT
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*swap<mode>_1"
+  [(set (match_operand:SWI12 0 "register_operand" "+r")
+	(match_operand:SWI12 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "xchg{l}\t%k1, %k0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
+;; is disabled for AMDFAM10
+(define_insn "*swap<mode>_2"
+  [(set (match_operand:SWI12 0 "register_operand" "+<r>")
+	(match_operand:SWI12 1 "register_operand" "+<r>"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_PARTIAL_REG_STALL"
+  "xchg{<imodesuffix>}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrict<mode>"
+  [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand" ""))
+	(match_operand:SWI12 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
+  /* Don't generate memory->memory moves, go through a register */
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "*movstrict<mode>_1"
+  [(set (strict_low_part
+	  (match_operand:SWI12 0 "nonimmediate_operand" "+<r>m,<r>"))
+	(match_operand:SWI12 1 "general_operand" "<r>n,m"))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vxorps\t%0, %d0";
-      else
-	return "%vpxor\t%0, %d0";
-    case 1:
-    case 2:
-      /* TDmode values are passed as TImode on the stack.  Moving them
-	 to stack may result in unaligned memory access.  */
-      if (misaligned_operand (operands[0], TImode)
-	  || misaligned_operand (operands[1], TImode))
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovups\t{%1, %0|%0, %1}";
-	 else
-	   return "%vmovdqu\t{%1, %0|%0, %1}";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	 else
-	   return "%vmovdqa\t{%1, %0|%0, %1}";
-	}
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "sselog1,ssemov,ssemov")
-   (set_attr "prefix" "maybe_vex")
+  "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movstrict<mode>_xor"
+  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
+	(match_operand:SWI12 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_extv_1"
+  [(set (match_operand:SWI24 0 "register_operand" "=R")
+	(sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extv_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
    (set (attr "mode")
-	(cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-		    (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)))
-		 (const_string "V4SF")
-	       (and (eq_attr "alternative" "2")
-		    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-			(const_int 0)))
-		 (const_string "V4SF")]
-	      (const_string "TI")))])
-
-(define_insn "*movti_rex64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
-	(match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vxorps\t%0, %d0";
-      else
-	return "%vpxor\t%0, %d0";
-    case 3:
-    case 4:
-      /* TDmode values are passed as TImode on the stack.  Moving them
-	 to stack may result in unaligned memory access.  */
-      if (misaligned_operand (operands[0], TImode)
-	  || misaligned_operand (operands[1], TImode))
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovups\t{%1, %0|%0, %1}";
-	 else
-	   return "%vmovdqu\t{%1, %0|%0, %1}";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	 else
-	   return "%vmovdqa\t{%1, %0|%0, %1}";
-	}
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
-   (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*mov<mode>_extzv_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=R")
+	(zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+        (subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-		 (if_then_else
-		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
-		       (const_int 0))
-		   (const_string "V4SF")
-		   (const_string "TI"))
-	       (eq_attr "alternative" "4")
-		 (if_then_else
-		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-			    (const_int 0))
-			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-			    (const_int 0)))
-		   (const_string "V4SF")
-		   (const_string "TI"))]
-	       (const_string "DI")))])
-
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-        (match_operand:TI 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_expand "mov<mode>_insv_1"
+  [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "")
+			    (const_int 8)
+			    (const_int 8))
+	(match_operand:SWI48 1 "nonmemory_operand" ""))])
+
+(define_insn "*mov<mode>_insv_1_rex64"
+  [(set (zero_extract:SWI48x (match_operand 0 "ext_register_operand" "+Q")
+			     (const_int 8)
+			     (const_int 8))
+	(match_operand:SWI48x 1 "nonmemory_operand" "Qn"))]
+  "TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movsi_insv_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "general_operand" "Qmn"))]
+  "!TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+		     (const_int 8)))]
+  ""
+  "mov{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+;; Floating point push instructions.
+
+(define_insn "*pushtf"
+  [(set (match_operand:TF 0 "push_operand" "=<,<,<")
+	(match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
+  "TARGET_SSE2"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "sse,*,*")
+   (set_attr "mode" "TF,SI,SI")])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "sse_reg_operand" ""))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+   (set (mem:TF (reg:P SP_REG)) (match_dup 1))])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  "TARGET_SSE2 && reload_completed
    && !SSE_REG_P (operands[1])"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
-;; This expands to what emit_move_complex would generate if we didn't
-;; have a movti pattern.  Having this avoids problems with reload on
-;; 32-bit targets when SSE is present, but doesn't seem to be harmful
-;; to have around all the time.
-(define_expand "movcdi"
-  [(set (match_operand:CDI 0 "nonimmediate_operand" "")
-	(match_operand:CDI 1 "general_operand" ""))]
-  ""
-{
-  if (push_operand (operands[0], CDImode))
-    emit_move_complex_push (CDImode, operands[0], operands[1]);
-  else
-    emit_move_complex_parts (operands[0], operands[1]);
-  DONE;
-})
-
-(define_expand "movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-	(match_operand:SF 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (SFmode, operands); DONE;")
+(define_insn "*pushxf"
+  [(set (match_operand:XF 0 "push_operand" "=<,<")
+	(match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
+  "optimize_function_for_speed_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*")
+   (set_attr "mode" "XF,SI")])
+
+;; Size of pushxf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushxf using integer instructions is 3+3*memory operand size
+;; Pushing using integer instructions is longer except for constants
+;; and direct memory references (assuming that any given constant is pushed
+;; only once, but this ought to be handled elsewhere).
+
+(define_insn "*pushxf_nointeger"
+  [(set (match_operand:XF 0 "push_operand" "=X,X,X")
+	(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+  "optimize_function_for_size_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "XF,SI,SI")])
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(match_operand:XF 1 "fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  "reload_completed
+   && !FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushdf"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
+  "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "DF,SI,DF")])
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter.  Allow this
+;; pattern for optimize_size too.
+
+(define_insn "*pushdf_nointeger"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
+  "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*,*")
+   (set_attr "mode" "DF,SI,SI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "any_fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (match_dup 1))])
+
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "reload_completed
+   && !ANY_FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushsf_rex64"
+  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+  "TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{q}\t%q1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,DI,SF")])
 
 (define_insn "*pushsf"
   [(set (match_operand:SF 0 "push_operand" "=<,<,<")
@@ -2814,19 +2745,6 @@
    (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,SI,SF")])
 
-(define_insn "*pushsf_rex64"
-  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
-	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
-  "TARGET_64BIT"
-{
-  /* Anything else should be already split before reg-stack.  */
-  gcc_assert (which_alternative == 1);
-  return "push{q}\t%q1";
-}
-  [(set_attr "type" "multi,push,multi")
-   (set_attr "unit" "i387,*,*")
-   (set_attr "mode" "SF,DI,SF")])
-
 (define_split
   [(set (match_operand:SF 0 "push_operand" "")
 	(match_operand:SF 1 "memory_operand" ""))]
@@ -2840,29 +2758,165 @@
 (define_split
   [(set (match_operand:SF 0 "push_operand" "")
 	(match_operand:SF 1 "any_fp_register_operand" ""))]
-  "!TARGET_64BIT"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
-   (set (mem:SF (reg:SI SP_REG)) (match_dup 1))])
-
-(define_split
-  [(set (match_operand:SF 0 "push_operand" "")
-	(match_operand:SF 1 "any_fp_register_operand" ""))]
-  "TARGET_64BIT"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
-   (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
-
-(define_insn "*movsf_1"
-  [(set (match_operand:SF 0 "nonimmediate_operand"
-	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
-	(match_operand:SF 1 "general_operand"
-	  "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:SF (reg:P SP_REG)) (match_dup 1))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (<MODE>mode));")
+
+;; Floating point move instructions.
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE2"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:X87MODEF 0 "nonimmediate_operand" "")
+	(match_operand:X87MODEF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
+	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  "TARGET_SSE2
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovdqa\t{%1, %0|%0, %1}";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+	return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,2")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "1")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed
+   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movxf_internal"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
+  "optimize_function_for_speed_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+;; Do not use integer registers when optimizing for size
+(define_insn "*movxf_internal_nointeger"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
+  "optimize_function_for_size_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || standard_80387_constant_p (operands[1])
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_split
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  "reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ! (FP_REG_P (operands[0]) ||
+	 (GET_CODE (operands[0]) == SUBREG
+	  && FP_REG_P (SUBREG_REG (operands[0]))))
+   && ! (FP_REG_P (operands[1]) ||
+	 (GET_CODE (operands[1]) == SUBREG
+	  && FP_REG_P (SUBREG_REG (operands[1]))))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdf_internal_rex64"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
 	   && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], SFmode))"
+       || memory_operand (operands[0], DFmode))"
 {
   switch (which_alternative)
     {
@@ -2875,149 +2929,272 @@
 
     case 3:
     case 4:
-      return "mov{l}\t{%1, %0|%0, %1}";
+      return "#";
+
     case 5:
-      if (get_attr_mode (insn) == MODE_TI)
-	return "%vpxor\t%0, %d0";
-      else
-	return "%vxorps\t%0, %d0";
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
     case 6:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vmovaps\t{%1, %0|%0, %1}";
-      else
-	return "%vmovss\t{%1, %d0|%d0, %1}";
     case 7:
-      if (TARGET_AVX)
-	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
-				   : "vmovss\t{%1, %0|%0, %1}";
-      else
-	return "movss\t{%1, %0|%0, %1}";
     case 8:
-      return "%vmovss\t{%1, %0|%0, %1}";
-
-    case 9: case 10: case 14: case 15:
-      return "movd\t{%1, %0|%0, %1}";
-    case 12: case 13:
-      return "%vmovd\t{%1, %0|%0, %1}";
-
-    case 11:
-      return "movq\t{%1, %0|%0, %1}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  return "%vmovlpd\t{%1, %d0|%d0, %1}";
+	case MODE_V2SF:
+	  return "%vmovlps\t{%1, %d0|%d0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case 9:
+    case 10:
+    return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
-       (const_string "maybe_vex")
-       (const_string "orig")))
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (eq_attr "mode" "V1DF")
+       (const_string "1")
+       (const_string "*")))
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "3,4,9,10")
-		 (const_string "SI")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4,9,10")
+		 (const_string "DI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
 	       (eq_attr "alternative" "5")
-		 (if_then_else
-		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-			    	 (const_int 0))
-			     (ne (symbol_ref "TARGET_SSE2")
-				 (const_int 0)))
-			(eq (symbol_ref "optimize_function_for_size_p (cfun)")
-			    (const_int 0)))
-		   (const_string "TI")
-		   (const_string "V4SF"))
-	       /* For architectures resolving dependencies on
-		  whole SSE registers use APS move to break dependency
-		  chains, otherwise use short move to avoid extra work.
-
-		  Do the same for architectures resolving dependencies on
-		  the parts.  While in DF mode it is better to always handle
-		  just register parts, the SF mode is different due to lack
-		  of instructions to load just part of the register.  It is
-		  better to maintain the whole registers in single format
-		  to avoid problems on using packed logical operations.  */
-	       (eq_attr "alternative" "6")
-		 (if_then_else
-		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
 			    (const_int 0))
-			(ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
-			    (const_int 0)))
-		   (const_string "V4SF")
-		   (const_string "SF"))
-	       (eq_attr "alternative" "11")
-		 (const_string "DI")]
-	       (const_string "SF")))])
-
-(define_insn "*swapsf"
-  [(set (match_operand:SF 0 "fp_register_operand" "+f")
-	(match_operand:SF 1 "fp_register_operand" "+f"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "reload_completed || TARGET_80387"
-{
-  if (STACK_TOP_P (operands[0]))
-    return "fxch\t%1";
-  else
-    return "fxch\t%0";
-}
-  [(set_attr "type" "fxch")
-   (set_attr "mode" "SF")])
-
-(define_expand "movdf"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (DFmode, operands); DONE;")
-
-;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
-;; Size of pushdf using integer instructions is 2+2*memory operand size
-;; On the average, pushdf using integers can be still shorter.  Allow this
-;; pattern for optimize_size too.
-
-(define_insn "*pushdf_nointeger"
-  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-	(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
-  "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*,*,*")
-   (set_attr "mode" "DF,SI,SI,DF")])
-
-(define_insn "*pushdf_integer"
-  [(set (match_operand:DF 0 "push_operand" "=<,<,<")
-	(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
-  "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*,*")
-   (set_attr "mode" "DF,SI,DF")])
-
-;; %%% Kill this when call knows how to work this out.
-(define_split
-  [(set (match_operand:DF 0 "push_operand" "")
-	(match_operand:DF 1 "any_fp_register_operand" ""))]
-  "reload_completed"
-  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
-   (set (mem:DF (reg:P SP_REG)) (match_dup 1))]
-  "")
-
-(define_split
-  [(set (match_operand:DF 0 "push_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  "reload_completed"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && optimize_function_for_speed_p (cfun)
+   && TARGET_INTEGER_DFMODE_MOVES
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]))
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], DFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "xorps\t%0, %0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "xorps\t%0, %0";
+	  else
+	    return "xorpd\t%0, %0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "xorps\t%0, %0";
+	  else
+	    return "pxor\t%0, %0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "movaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "movq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  return "movlpd\t{%1, %0|%0, %1}";
+	case MODE_V2SF:
+	  return "movlps\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix_data16")
+     (if_then_else (eq_attr "mode" "V1DF")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "SI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
 
 ;; Moving is usually shorter when only FP registers are used. This separate
 ;; movdf pattern avoids the use of integer registers for FP operations
 ;; when optimizing for size.
 
-(define_insn "*movdf_nointeger"
+(define_insn "*movdf_internal_nointeger"
   [(set (match_operand:DF 0 "nonimmediate_operand"
 			"=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
 	(match_operand:DF 1 "general_operand"
@@ -3049,15 +3226,22 @@
     case 3:
     case 4:
       return "#";
+
     case 5:
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
 	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
-	  return "%vxorpd\t%0, %d0";
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vxorpd\t%0, %d0";
 	case MODE_TI:
-	  return "%vpxor\t%0, %d0";
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vpxor\t%0, %d0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -3069,9 +3253,15 @@
 	case MODE_V4SF:
 	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return "%vmovapd\t{%1, %0|%0, %1}";
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
 	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
@@ -3172,268 +3362,6 @@
 	      ]
 	      (const_string "DF")))])
 
-(define_insn "*movdf_integer_rex64"
-  [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
-	(match_operand:DF 1 "general_operand"
-		"fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
-  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && (reload_in_progress || reload_completed
-       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
-           && optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]))
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], DFmode))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3:
-    case 4:
-      return "#";
-
-    case 5:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vxorps\t%0, %d0";
-	case MODE_V2DF:
-	  return "%vxorpd\t%0, %d0";
-	case MODE_TI:
-	  return "%vpxor\t%0, %d0";
-	default:
-	  gcc_unreachable ();
-	}
-    case 6:
-    case 7:
-    case 8:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-	case MODE_V2DF:
-	  return "%vmovapd\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_DI:
-	  return "%vmovq\t{%1, %0|%0, %1}";
-	case MODE_DF:
-	  if (TARGET_AVX)
-	    {
-	      if (REG_P (operands[0]) && REG_P (operands[1]))
-		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
-	      else
-		return "vmovsd\t{%1, %0|%0, %1}";
-	    }
-	  else
-	    return "movsd\t{%1, %0|%0, %1}";
-	case MODE_V1DF:
-	  return "%vmovlpd\t{%1, %d0|%d0, %1}";
-	case MODE_V2SF:
-	  return "%vmovlps\t{%1, %d0|%d0, %1}";
-	default:
-	  gcc_unreachable ();
-	}
-
-    case 9:
-    case 10:
-    return "%vmovd\t{%1, %0|%0, %1}";
-
-    default:
-      gcc_unreachable();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
-   (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
-       (const_string "orig")
-       (const_string "maybe_vex")))
-   (set (attr "prefix_data16")
-     (if_then_else (eq_attr "mode" "V1DF")
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,1,2")
-		 (const_string "DF")
-	       (eq_attr "alternative" "3,4,9,10")
-		 (const_string "DI")
-
-	       /* For SSE1, we have many fewer alternatives.  */
-	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-		 (cond [(eq_attr "alternative" "5,6")
-			  (const_string "V4SF")
-		       ]
-		   (const_string "V2SF"))
-
-	       /* xorps is one byte shorter.  */
-	       (eq_attr "alternative" "5")
-		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-			    (const_int 0))
-			  (const_string "V4SF")
-			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-			    (const_int 0))
-			  (const_string "TI")
-		       ]
-		       (const_string "V2DF"))
-
-	       /* For architectures resolving dependencies on
-		  whole SSE registers use APD move to break dependency
-		  chains, otherwise use short move to avoid extra work.
-
-		  movaps encodes one byte shorter.  */
-	       (eq_attr "alternative" "6")
-		 (cond
-		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-		        (const_int 0))
-		      (const_string "V4SF")
-		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-		        (const_int 0))
-		      (const_string "V2DF")
-		   ]
-		   (const_string "DF"))
-	       /* For architectures resolving dependencies on register
-		  parts we may avoid extra work to zero out upper part
-		  of register.  */
-	       (eq_attr "alternative" "7")
-		 (if_then_else
-		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
-		       (const_int 0))
-		   (const_string "V1DF")
-		   (const_string "DF"))
-	      ]
-	      (const_string "DF")))])
-
-(define_insn "*movdf_integer"
-  [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
-	(match_operand:DF 1 "general_operand"
-		"fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && optimize_function_for_speed_p (cfun)
-   && TARGET_INTEGER_DFMODE_MOVES
-   && (reload_in_progress || reload_completed
-       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
-           && optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]))
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], DFmode))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3:
-    case 4:
-      return "#";
-
-    case 5:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "xorps\t%0, %0";
-	case MODE_V2DF:
-	  return "xorpd\t%0, %0";
-	case MODE_TI:
-	  return "pxor\t%0, %0";
-	default:
-	  gcc_unreachable ();
-	}
-    case 6:
-    case 7:
-    case 8:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
-	case MODE_V2DF:
-	  return "movapd\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
-	case MODE_DI:
-	  return "movq\t{%1, %0|%0, %1}";
-	case MODE_DF:
-	  return "movsd\t{%1, %0|%0, %1}";
-	case MODE_V1DF:
-	  return "movlpd\t{%1, %0|%0, %1}";
-	case MODE_V2SF:
-	  return "movlps\t{%1, %0|%0, %1}";
-	default:
-	  gcc_unreachable ();
-	}
-
-    default:
-      gcc_unreachable();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
-   (set (attr "prefix_data16")
-     (if_then_else (eq_attr "mode" "V1DF")
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,1,2")
-		 (const_string "DF")
-	       (eq_attr "alternative" "3,4")
-		 (const_string "SI")
-
-	       /* For SSE1, we have many fewer alternatives.  */
-	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-		 (cond [(eq_attr "alternative" "5,6")
-			  (const_string "V4SF")
-		       ]
-		   (const_string "V2SF"))
-
-	       /* xorps is one byte shorter.  */
-	       (eq_attr "alternative" "5")
-		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-			    (const_int 0))
-			  (const_string "V4SF")
-			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-			    (const_int 0))
-			  (const_string "TI")
-		       ]
-		       (const_string "V2DF"))
-
-	       /* For architectures resolving dependencies on
-		  whole SSE registers use APD move to break dependency
-		  chains, otherwise use short move to avoid extra work.
-
-		  movaps encodes one byte shorter.  */
-	       (eq_attr "alternative" "6")
-		 (cond
-		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-		        (const_int 0))
-		      (const_string "V4SF")
-		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-		        (const_int 0))
-		      (const_string "V2DF")
-		   ]
-		   (const_string "DF"))
-	       /* For architectures resolving dependencies on register
-		  parts we may avoid extra work to zero out upper part
-		  of register.  */
-	       (eq_attr "alternative" "7")
-		 (if_then_else
-		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
-		       (const_int 0))
-		   (const_string "V1DF")
-		   (const_string "DF"))
-	      ]
-	      (const_string "DF")))])
-
 (define_split
   [(set (match_operand:DF 0 "nonimmediate_operand" "")
 	(match_operand:DF 1 "general_operand" ""))]
@@ -3448,113 +3376,18 @@
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
-(define_insn "*swapdf"
-  [(set (match_operand:DF 0 "fp_register_operand" "+f")
-	(match_operand:DF 1 "fp_register_operand" "+f"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "reload_completed || TARGET_80387"
-{
-  if (STACK_TOP_P (operands[0]))
-    return "fxch\t%1";
-  else
-    return "fxch\t%0";
-}
-  [(set_attr "type" "fxch")
-   (set_attr "mode" "DF")])
-
-(define_expand "movxf"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-	(match_operand:XF 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (XFmode, operands); DONE;")
-
-;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
-;; Size of pushdf using integer instructions is 3+3*memory operand size
-;; Pushing using integer instructions is longer except for constants
-;; and direct memory references.
-;; (assuming that any given constant is pushed only once, but this ought to be
-;;  handled elsewhere).
-
-(define_insn "*pushxf_nointeger"
-  [(set (match_operand:XF 0 "push_operand" "=X,X,X")
-	(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
-  "optimize_function_for_size_p (cfun)"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*,*")
-   (set_attr "mode" "XF,SI,SI")])
-
-(define_insn "*pushxf_integer"
-  [(set (match_operand:XF 0 "push_operand" "=<,<")
-	(match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
-  "optimize_function_for_speed_p (cfun)"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*")
-   (set_attr "mode" "XF,SI")])
-
-(define_split
-  [(set (match_operand 0 "push_operand" "")
-	(match_operand 1 "general_operand" ""))]
-  "reload_completed
-   && (GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode)
-   && !ANY_FP_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-	(match_operand:XF 1 "any_fp_register_operand" ""))]
-  ""
-  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
-   (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
-
-;; Do not use integer registers when optimizing for size
-(define_insn "*movxf_nointeger"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
-	(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
-  "optimize_function_for_size_p (cfun)
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
+	(match_operand:SF 1 "general_operand"
+	  "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
-       || standard_80387_constant_p (operands[1])
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], XFmode))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3: case 4:
-      return "#";
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
-   (set_attr "mode" "XF,XF,XF,SI,SI")])
-
-(define_insn "*movxf_integer"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
-	(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
-  "optimize_function_for_speed_p (cfun)
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && (reload_in_progress || reload_completed
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], XFmode))"
+       || memory_operand (operands[0], SFmode))"
 {
   switch (which_alternative)
     {
@@ -3565,112 +3398,79 @@
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
-    case 3: case 4:
-      return "#";
+    case 3:
+    case 4:
+      return "mov{l}\t{%1, %0|%0, %1}";
+    case 5:
+      if (get_attr_mode (insn) == MODE_TI)
+	return "%vpxor\t%0, %d0";
+      else
+	return "%vxorps\t%0, %d0";
+    case 6:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovss\t{%1, %d0|%d0, %1}";
+    case 7:
+      if (TARGET_AVX)
+	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+				   : "vmovss\t{%1, %0|%0, %1}";
+      else
+	return "movss\t{%1, %0|%0, %1}";
+    case 8:
+      return "%vmovss\t{%1, %0|%0, %1}";
+
+    case 9: case 10: case 14: case 15:
+      return "movd\t{%1, %0|%0, %1}";
+    case 12: case 13:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    case 11:
+      return "movq\t{%1, %0|%0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
-   (set_attr "mode" "XF,XF,XF,SI,SI")])
-
-(define_expand "movtf"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-	(match_operand:TF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE2"
-{
-  ix86_expand_move (TFmode, operands);
-  DONE;
-})
-
-(define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
-	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
-  "TARGET_SSE2
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vmovaps\t{%1, %0|%0, %1}";
-      else
-	return "%vmovdqa\t{%1, %0|%0, %1}";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vxorps\t%0, %d0";
-      else
-	return "%vpxor\t%0, %d0";
-    case 3:
-    case 4:
-	return "#";
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
-   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
+  [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+       (const_string "maybe_vex")
+       (const_string "orig")))
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,2")
+        (cond [(eq_attr "alternative" "3,4,9,10")
+		 (const_string "SI")
+	       (eq_attr "alternative" "5")
 		 (if_then_else
-		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
-		       (const_int 0))
-		   (const_string "V4SF")
-		   (const_string "TI"))
-	       (eq_attr "alternative" "1")
+		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    	 (const_int 0))
+			     (ne (symbol_ref "TARGET_SSE2")
+				 (const_int 0)))
+			(eq (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "TI")
+		   (const_string "V4SF"))
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APS move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  Do the same for architectures resolving dependencies on
+		  the parts.  While in DF mode it is better to always handle
+		  just register parts, the SF mode is different due to lack
+		  of instructions to load just part of the register.  It is
+		  better to maintain the whole registers in single format
+		  to avoid problems on using packed logical operations.  */
+	       (eq_attr "alternative" "6")
 		 (if_then_else
-		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 			    (const_int 0))
-			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			(ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
 			    (const_int 0)))
 		   (const_string "V4SF")
-		   (const_string "TI"))]
-	       (const_string "DI")))])
-
-(define_insn "*pushtf_sse"
-  [(set (match_operand:TF 0 "push_operand" "=<,<,<")
-	(match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
-  "TARGET_SSE2"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "sse,*,*")
-   (set_attr "mode" "TF,SI,SI")])
-
-(define_split
-  [(set (match_operand:TF 0 "push_operand" "")
-	(match_operand:TF 1 "general_operand" ""))]
-  "TARGET_SSE2 && reload_completed
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-(define_split
-  [(set (match_operand:TF 0 "push_operand" "")
-	(match_operand:TF 1 "any_fp_register_operand" ""))]
-  "TARGET_SSE2"
-  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
-   (set (mem:TF (reg:P SP_REG)) (match_dup 1))]
-  "")
-
-(define_split
-  [(set (match_operand 0 "nonimmediate_operand" "")
-	(match_operand 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && GET_MODE (operands[0]) == XFmode
-   && ! (ANY_FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (ANY_FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
+		   (const_string "SF"))
+	       (eq_attr "alternative" "11")
+		 (const_string "DI")]
+	       (const_string "SF")))])
 
 (define_split
   [(set (match_operand 0 "register_operand" "")
@@ -3679,8 +3479,8 @@
    && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == SFmode
-       || GET_MODE (operands[0]) == DFmode)
+       || GET_MODE (operands[0]) == DFmode
+       || GET_MODE (operands[0]) == SFmode)
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
@@ -3711,8 +3511,8 @@
    && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == SFmode
-       || GET_MODE (operands[0]) == DFmode)
+       || GET_MODE (operands[0]) == DFmode
+       || GET_MODE (operands[0]) == SFmode)
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
@@ -3736,21 +3536,6 @@
     FAIL;
 })
 
-(define_insn "swapxf"
-  [(set (match_operand:XF 0 "register_operand" "+f")
-	(match_operand:XF 1 "register_operand" "+f"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "TARGET_80387"
-{
-  if (STACK_TOP_P (operands[0]))
-    return "fxch\t%1";
-  else
-    return "fxch\t%0";
-}
-  [(set_attr "type" "fxch")
-   (set_attr "mode" "XF")])
-
 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
 (define_split
   [(set (match_operand:X87MODEF 0 "register_operand" "")
@@ -3771,221 +3556,76 @@
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed
-   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
+(define_insn "swapxf"
+  [(set (match_operand:XF 0 "register_operand" "+f")
+	(match_operand:XF 1 "register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "XF")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:MODEF 0 "fp_register_operand" "+f")
+	(match_operand:MODEF 1 "fp_register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387 || reload_completed"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "<MODE>")])
 
 ;; Zero extension instructions
 
-(define_expand "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "")
-     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
-  ""
-{
-  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
-    {
-      operands[1] = force_reg (HImode, operands[1]);
-      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_insn "zero_extendhisi2_and"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-     (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
-  "#"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "SI")])
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && TARGET_ZERO_EXTEND_WITH_AND
-   && optimize_function_for_speed_p (cfun)"
-  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_insn "*zero_extendhisi2_movzwl"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
-  "!TARGET_ZERO_EXTEND_WITH_AND
-   || optimize_function_for_size_p (cfun)"
-  "movz{wl|x}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-(define_expand "zero_extendqihi2"
-  [(parallel
-    [(set (match_operand:HI 0 "register_operand" "")
-       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
-     (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
-(define_insn "*zero_extendqihi2_and"
-  [(set (match_operand:HI 0 "register_operand" "=r,?&q")
-     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
-  "#"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "HI")])
-
-(define_insn "*zero_extendqihi2_movzbw_and"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
-  "#"
-  [(set_attr "type" "imovx,alu1")
-   (set_attr "mode" "HI")])
-
-; zero extend to SImode here to avoid partial register stalls
-(define_insn "*zero_extendqihi2_movzbl"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
-   && reload_completed"
-  "movz{bl|x}\t{%1, %k0|%k0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-;; For the movzbw case strip only the clobber
-(define_split
-  [(set (match_operand:HI 0 "register_operand" "")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND
-       || optimize_function_for_size_p (cfun))
-   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
-  [(set (match_operand:HI 0 "register_operand" "")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
-
-;; When source and destination does not overlap, clear destination
-;; first and then do the movb
-(define_split
-  [(set (match_operand:HI 0 "register_operand" "")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ANY_QI_REG_P (operands[0])
-   && (TARGET_ZERO_EXTEND_WITH_AND
-       && optimize_function_for_speed_p (cfun))
-   && !reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
-{
-  operands[2] = gen_lowpart (QImode, operands[0]);
-  ix86_expand_clear (operands[0]);
-})
-
-;; Rest is handled by single and.
-(define_split
-  [(set (match_operand:HI 0 "register_operand" "")
-	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) == true_regnum (operands[1])"
-  [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_expand "zero_extendqisi2"
-  [(parallel
-    [(set (match_operand:SI 0 "register_operand" "")
-       (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
-     (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
-(define_insn "*zero_extendqisi2_and"
-  [(set (match_operand:SI 0 "register_operand" "=r,?&q")
-     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
-  "#"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "SI")])
-
-(define_insn "*zero_extendqisi2_movzbl_and"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
-  "#"
-  [(set_attr "type" "imovx,alu1")
-   (set_attr "mode" "SI")])
-
-(define_insn "*zero_extendqisi2_movzbl"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
-   && reload_completed"
-  "movz{bl|x}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
-
-;; For the movzbl case strip only the clobber
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
-   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
-  [(set (match_dup 0)
-	(zero_extend:SI (match_dup 1)))])
-
-;; When source and destination does not overlap, clear destination
-;; first and then do the movb
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ANY_QI_REG_P (operands[0])
-   && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
-   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
-   && !reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
-{
-  operands[2] = gen_lowpart (QImode, operands[0]);
-  ix86_expand_clear (operands[0]);
-})
-
-;; Rest is handled by single and.
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) == true_regnum (operands[1])"
-  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-;; %%% Kill me once multi-word ops are sane.
 (define_expand "zero_extendsidi2"
-  [(set (match_operand:DI 0 "register_operand" "")
-     (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
   ""
 {
   if (!TARGET_64BIT)
     {
-      emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
+      emit_insn (gen_zero_extendsidi2_1 (operands[0], operands[1]));
       DONE;
     }
 })
 
-(define_insn "zero_extendsidi2_32"
+(define_insn "*zero_extendsidi2_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"  "=r,o,?*Ym,?*y,?*Yi,*Y2")
+	(zero_extend:DI
+	 (match_operand:SI 1 "nonimmediate_operand" "rm,0,r   ,m  ,r   ,m")))]
+  "TARGET_64BIT"
+  "@
+   mov\t{%k1, %k0|%k0, %k1}
+   #
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "prefix_0f" "0,*,*,*,*,*")
+   (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+     	(zero_extend:DI (match_dup 0)))]
+  "TARGET_64BIT"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+;; %%% Kill me once multi-word ops are sane.
+(define_insn "zero_extendsidi2_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2")
 	(zero_extend:DI
 	 (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
@@ -4003,30 +3643,6 @@
    (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
    (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
 
-(define_insn "zero_extendsidi2_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2")
-     (zero_extend:DI
-       (match_operand:SI 1 "nonimmediate_operand"  "rm,0,r   ,m  ,r   ,m")))]
-  "TARGET_64BIT"
-  "@
-   mov\t{%k1, %k0|%k0, %k1}
-   #
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}
-   %vmovd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
-   (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
-   (set_attr "prefix_0f" "0,*,*,*,*,*")
-   (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
-
-(define_split
-  [(set (match_operand:DI 0 "memory_operand" "")
-     (zero_extend:DI (match_dup 0)))]
-  "TARGET_64BIT"
-  [(set (match_dup 4) (const_int 0))]
-  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
-
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
 	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
@@ -4034,59 +3650,149 @@
   "!TARGET_64BIT && reload_completed
    && true_regnum (operands[0]) == true_regnum (operands[1])"
   [(set (match_dup 4) (const_int 0))]
-  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
 	(zero_extend:DI (match_operand:SI 1 "general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && reload_completed
-   && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])"
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
   [(set (match_dup 3) (match_dup 1))
    (set (match_dup 4) (const_int 0))]
-  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
-
-(define_insn "zero_extendhidi2"
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "register_operand" "=r")
-     (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+	(zero_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
   "TARGET_64BIT"
-  "movz{wl|x}\t{%1, %k0|%k0, %1}"
+  "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (HImode, operands[1]);
+      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "zero_extendhisi2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_movzwl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "!TARGET_ZERO_EXTEND_WITH_AND
+   || optimize_function_for_size_p (cfun)"
+  "movz{wl|x}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
 
-(define_insn "zero_extendqidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-     (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))]
-  "TARGET_64BIT"
+(define_expand "zero_extendqi<mode>2"
+  [(parallel
+    [(set (match_operand:SWI24 0 "register_operand" "")
+	  (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*zero_extendqi<mode>2_and"
+  [(set (match_operand:SWI24 0 "register_operand" "=r,?&q")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+   && ANY_QI_REG_P (operands[0])
+   && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
+{
+  operands[2] = gen_lowpart (QImode, operands[0]);
+  ix86_expand_clear (operands[0]);
+})
+
+(define_insn "*zero_extendqi<mode>2_movzbl_and"
+  [(set (match_operand:SWI24 0 "register_operand" "=r,r")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+  "#"
+  [(set_attr "type" "imovx,alu1")
+   (set_attr "mode" "<MODE>")])
+
+;; For the movzbl case strip only the clobber
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+  [(set (match_dup 0)
+	(zero_extend:SWI24 (match_dup 1)))])
+
+; zero extend to SImode to avoid partial register stalls
+(define_insn "*zero_extendqi<mode>2_movzbl"
+  [(set (match_operand:SWI24 0 "register_operand" "=r")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
   "movz{bl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
+
+;; Rest is handled by single and.
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(parallel [(set (match_dup 0) (and:SWI24 (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; Sign extension instructions
 
 (define_expand "extendsidi2"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (match_scratch:SI 2 ""))])]
-  ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1]));
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_64BIT)
+    {
+      emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
       DONE;
     }
 })
 
-(define_insn "*extendsidi2_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
-	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
-   (clobber (reg:CC FLAGS_REG))
-   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
-  "!TARGET_64BIT"
-  "#")
-
-(define_insn "extendsidi2_rex64"
+(define_insn "*extendsidi2_rex64"
   [(set (match_operand:DI 0 "register_operand" "=*a,r")
 	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
   "TARGET_64BIT"
@@ -4098,21 +3804,13 @@
    (set_attr "prefix_0f" "0")
    (set_attr "modrm" "0,1")])
 
-(define_insn "extendhidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
-  "TARGET_64BIT"
-  "movs{wq|x}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "DI")])
-
-(define_insn "extendqidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "TARGET_64BIT"
-  "movs{bq|x}\t{%1, %0|%0, %1}"
-   [(set_attr "type" "imovx")
-    (set_attr "mode" "DI")])
+(define_insn "extendsidi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+  "!TARGET_64BIT"
+  "#")
 
 ;; Extend to memory case when source register does die.
 (define_split
@@ -4127,7 +3825,7 @@
    (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
 	      (clobber (reg:CC FLAGS_REG))])
    (set (match_dup 4) (match_dup 1))]
-  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 ;; Extend to memory case when source register does not die.
 (define_split
@@ -4138,7 +3836,7 @@
   "reload_completed"
   [(const_int 0)]
 {
-  split_di (&operands[0], 1, &operands[3], &operands[4]);
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
 
   emit_move_insn (operands[3], operands[1]);
 
@@ -4147,12 +3845,12 @@
       && true_regnum (operands[1]) == AX_REG
       && true_regnum (operands[2]) == DX_REG)
     {
-      emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31)));
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
     }
   else
     {
       emit_move_insn (operands[2], operands[1]);
-      emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31)));
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
     }
   emit_move_insn (operands[4], operands[2]);
   DONE;
@@ -4168,25 +3866,35 @@
   "reload_completed"
   [(const_int 0)]
 {
-  split_di (&operands[0], 1, &operands[3], &operands[4]);
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
 
   if (true_regnum (operands[3]) != true_regnum (operands[1]))
     emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
   if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-      && true_regnum (operands[3]) == AX_REG)
-    {
-      emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
+      && true_regnum (operands[3]) == AX_REG
+      && true_regnum (operands[4]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
       DONE;
     }
 
   if (true_regnum (operands[4]) != true_regnum (operands[1]))
     emit_move_insn (operands[4], operands[1]);
 
-  emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31)));
-  DONE;
-})
+  emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
+  DONE;
+})
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "TARGET_64BIT"
+  "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
 
 (define_insn "extendhisi2"
   [(set (match_operand:SI 0 "register_operand" "=*a,r")
@@ -4217,7 +3925,8 @@
 (define_insn "*extendhisi2_zext"
   [(set (match_operand:DI 0 "register_operand" "=*a,r")
 	(zero_extend:DI
-	  (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+	 (sign_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
   "TARGET_64BIT"
 {
   switch (get_attr_prefix_0f (insn))
@@ -4241,32 +3950,6 @@
 	(const_string "0")
 	(const_string "1")))])
 
-(define_insn "extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=*a,r")
-	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
-  ""
-{
-  switch (get_attr_prefix_0f (insn))
-    {
-    case 0:
-      return "{cbtw|cbw}";
-    default:
-      return "movs{bw|x}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "HI")
-   (set (attr "prefix_0f")
-     ;; movsx is short decodable while cwtl is vector decoded.
-     (if_then_else (and (eq_attr "cpu" "!k6")
-			(eq_attr "alternative" "0"))
-	(const_string "0")
-	(const_string "1")))
-   (set (attr "modrm")
-     (if_then_else (eq_attr "prefix_0f" "0")
-	(const_string "0")
-	(const_string "1")))])
-
 (define_insn "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
@@ -4283,47 +3966,53 @@
   "movs{bl|x}\t{%1, %k0|%k0, %1}"
    [(set_attr "type" "imovx")
     (set_attr "mode" "SI")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=*a,r")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cbtw|cbw}";
+    default:
+      return "movs{bw|x}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "HI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
 
 ;; Conversions between float and double.
 
-;; These are all no-ops in the model used for the 80387.  So just
-;; emit moves.
+;; These are all no-ops in the model used for the 80387.
+;; So just emit moves.
 
 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
-(define_insn "*dummy_extendsfdf2"
-  [(set (match_operand:DF 0 "push_operand" "=<")
-	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))]
-  "0"
-  "#")
-
 (define_split
   [(set (match_operand:DF 0 "push_operand" "")
 	(float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
-  ""
+  "reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
    (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
 
-(define_insn "*dummy_extendsfxf2"
-  [(set (match_operand:XF 0 "push_operand" "=<")
-	(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))]
-  "0"
-  "#")
-
 (define_split
   [(set (match_operand:XF 0 "push_operand" "")
-	(float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
-  ""
+	(float_extend:XF (match_operand:MODEF 1 "fp_register_operand" "")))]
+  "reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
    (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
-
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-	(float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
-  ""
-  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
-   (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
 
 (define_expand "extendsfdf2"
   [(set (match_operand:DF 0 "nonimmediate_operand" "")
@@ -4538,8 +4227,7 @@
 (define_expand "truncdfsf2_with_temp"
   [(parallel [(set (match_operand:SF 0 "" "")
 		   (float_truncate:SF (match_operand:DF 1 "" "")))
-	      (clobber (match_operand:SF 2 "" ""))])]
-  "")
+	      (clobber (match_operand:SF 2 "" ""))])])
 
 (define_insn "*truncdfsf_fast_mixed"
   [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,x")
@@ -4644,9 +4332,7 @@
   "reload_completed"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
-{
-  operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));
-})
+  "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));")
 
 ;; Conversion from XFmode to {SF,DF}mode
 
@@ -4667,9 +4353,9 @@
     }
   else
     {
-     enum ix86_stack_slot slot = (virtuals_instantiated
-				  ? SLOT_TEMP
-				  : SLOT_VIRTUAL);
+      enum ix86_stack_slot slot = (virtuals_instantiated
+				   ? SLOT_TEMP
+				   : SLOT_VIRTUAL);
       operands[2] = assign_386_stack_local (<MODE>mode, slot);
     }
 })
@@ -4727,8 +4413,7 @@
    (clobber (match_operand:MODEF 2 "memory_operand" ""))]
   "TARGET_80387 && reload_completed"
   [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 (define_split
   [(set (match_operand:MODEF 0 "memory_operand" "")
@@ -4736,8 +4421,7 @@
 	  (match_operand:XF 1 "register_operand" "")))
    (clobber (match_operand:MODEF 2 "memory_operand" ""))]
   "TARGET_80387"
-  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))])
 
 ;; Signed conversion to DImode.
 
@@ -4851,7 +4535,7 @@
 
   real_ldexp (&TWO31r, &dconst1, 31);
   two31 = const_double_from_real_value (TWO31r, mode);
-  two31 = ix86_build_const_vector (mode, true, two31);
+  two31 = ix86_build_const_vector (vecmode, true, two31);
   operands[2] = force_reg (vecmode, two31);
 })
 
@@ -4896,7 +4580,8 @@
    (set_attr "prefix_rex" "1")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")])
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
 
 (define_insn "fix_trunc<mode>si_sse"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4908,7 +4593,8 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")])
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
 
 ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
 (define_peephole2
@@ -4917,9 +4603,9 @@
    (set (match_operand:SSEMODEI24 2 "register_operand" "")
 	(fix:SSEMODEI24 (match_dup 0)))]
   "TARGET_SHORTEN_X87_SSE
+   && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ())
    && peep2_reg_dead_p (2, operands[0])"
-  [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
-  "")
+  [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))])
 
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
@@ -4928,8 +4614,7 @@
 	(fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
   "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
-  "")
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))])
 
 (define_peephole2
   [(match_scratch:SF 2 "x")
@@ -4937,8 +4622,7 @@
 	(fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
   "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
-  "")
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))])
 
 (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
   [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
@@ -5002,8 +4686,7 @@
   "reload_completed"
   [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
 	      (clobber (match_dup 3))])
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 (define_split
   [(set (match_operand:X87MODEI 0 "memory_operand" "")
@@ -5012,8 +4695,7 @@
    (clobber (match_scratch 3 ""))]
   "reload_completed"
   [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
-	      (clobber (match_dup 3))])]
-  "")
+	      (clobber (match_dup 3))])])
 
 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
@@ -5094,8 +4776,7 @@
 	      (use (match_dup 2))
 	      (use (match_dup 3))
 	      (clobber (match_dup 5))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
@@ -5108,8 +4789,7 @@
   [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
 	      (use (match_dup 2))
 	      (use (match_dup 3))
-	      (clobber (match_dup 5))])]
-  "")
+	      (clobber (match_dup 5))])])
 
 (define_insn "fix_trunc<mode>_i387"
   [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
@@ -5148,8 +4828,7 @@
   [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
 	      (use (match_dup 2))
 	      (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:X87MODEI12 0 "memory_operand" "")
@@ -5160,28 +4839,31 @@
   "reload_completed"
   [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
 	      (use (match_dup 2))
-	      (use (match_dup 3))])]
-  "")
+	      (use (match_dup 3))])])
 
 (define_insn "x86_fnstcw_1"
   [(set (match_operand:HI 0 "memory_operand" "=m")
 	(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
   "TARGET_80387"
   "fnstcw\t%0"
-  [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
    (set_attr "mode" "HI")
-   (set_attr "unit" "i387")])
+   (set_attr "unit" "i387")
+   (set_attr "bdver1_decode" "vector")])
 
 (define_insn "x86_fldcw_1"
   [(set (reg:HI FPCR_REG)
 	(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
   "TARGET_80387"
   "fldcw\t%0"
-  [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
    (set_attr "mode" "HI")
    (set_attr "unit" "i387")
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")])
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
 
 ;; Conversion between fixed point and floating point.
 
@@ -5193,8 +4875,7 @@
 	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
   "TARGET_80387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)"
-  "")
+       || TARGET_MIX_SSE_I387)")
 
 ;; Pre-reload splitter to add memory clobber to the pattern.
 (define_insn_and_split "*floathi<mode>2_1"
@@ -5244,8 +4925,7 @@
        || TARGET_MIX_SSE_I387)
    && reload_completed"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
-  "")
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))])
 
 (define_split
   [(set (match_operand:X87MODEF 0 "register_operand" "")
@@ -5255,8 +4935,7 @@
     && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
         || TARGET_MIX_SSE_I387)
     && reload_completed"
-  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 (define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
   [(set (match_operand:X87MODEF 0 "register_operand" "")
@@ -5338,6 +5017,7 @@
    (set_attr "unit" "*,i387,*,*,*")
    (set_attr "athlon_decode" "*,*,double,direct,double")
    (set_attr "amdfam10_decode" "*,*,vector,double,double")
+   (set_attr "bdver1_decode" "*,*,double,direct,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsi<mode>2_vector_mixed"
@@ -5353,6 +5033,7 @@
    (set_attr "unit" "i387,*")
    (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
+   (set_attr "bdver1_decode" "*,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
@@ -5368,6 +5049,7 @@
    (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,double,direct")
    (set_attr "amdfam10_decode" "*,*,vector,double")
+   (set_attr "bdver1_decode" "*,*,double,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_split
@@ -5381,8 +5063,7 @@
    && (SSE_REG_P (operands[0])
        || (GET_CODE (operands[0]) == SUBREG
 	   && SSE_REG_P (operands[0])))"
-  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_split
   [(set (match_operand:MODEF 0 "register_operand" "")
@@ -5396,8 +5077,7 @@
        || (GET_CODE (operands[0]) == SUBREG
 	   && SSE_REG_P (operands[0])))"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (float:MODEF (match_dup 2)))]
-  "")
+   (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
 (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
   [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
@@ -5422,6 +5102,7 @@
    (set_attr "unit" "i387,*,*")
    (set_attr "athlon_decode" "*,double,direct")
    (set_attr "amdfam10_decode" "*,vector,double")
+   (set_attr "bdver1_decode" "*,double,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
@@ -5445,6 +5126,7 @@
        (const_string "*")))
    (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
+   (set_attr "bdver1_decode" "*,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsi<mode>2_vector_sse_with_temp"
@@ -5459,6 +5141,7 @@
    (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
    (set_attr "athlon_decode" "double,direct,double")
    (set_attr "amdfam10_decode" "vector,double,double")
+   (set_attr "bdver1_decode" "double,direct,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsi<mode>2_vector_sse"
@@ -5471,6 +5154,7 @@
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "fp_int_src" "true")])
 
 (define_split
@@ -5606,6 +5290,7 @@
    (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,direct")
    (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
@@ -5627,6 +5312,7 @@
        (const_string "*")))
    (set_attr "athlon_decode" "double,direct")
    (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
    (set_attr "fp_int_src" "true")])
 
 (define_split
@@ -5640,8 +5326,7 @@
    && (SSE_REG_P (operands[0])
        || (GET_CODE (operands[0]) == SUBREG
 	   && SSE_REG_P (operands[0])))"
-  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
@@ -5662,6 +5347,7 @@
        (const_string "*")))
    (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "fp_int_src" "true")])
 
 (define_split
@@ -5676,8 +5362,7 @@
        || (GET_CODE (operands[0]) == SUBREG
 	   && SSE_REG_P (operands[0])))"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (float:MODEF (match_dup 2)))]
-  "")
+   (set (match_dup 0) (float:MODEF (match_dup 2)))])
 
 (define_split
   [(set (match_operand:MODEF 0 "register_operand" "")
@@ -5689,8 +5374,7 @@
    && (SSE_REG_P (operands[0])
        || (GET_CODE (operands[0]) == SUBREG
 	   && SSE_REG_P (operands[0])))"
-  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
 
 (define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
   [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
@@ -5727,8 +5411,7 @@
    && reload_completed
    && FP_REG_P (operands[0])"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
-  "")
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))])
 
 (define_split
   [(set (match_operand:X87MODEF 0 "register_operand" "")
@@ -5738,8 +5421,7 @@
    && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
    && reload_completed
    && FP_REG_P (operands[0])"
-  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 ;; Avoid store forwarding (partial memory) stall penalty
 ;; by passing DImode value through XMM registers.  */
@@ -5797,8 +5479,7 @@
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
    && reload_completed
    && FP_REG_P (operands[0])"
-  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
-  "")
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
@@ -5927,7 +5608,7 @@
 		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
 		       (match_dup 5))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);")
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
 
 (define_insn "*add<mode>3_cc"
   [(set (reg:CC FLAGS_REG)
@@ -5956,8 +5637,8 @@
    (set_attr "mode" "QI")])
 
 (define_insn "*lea_1"
-  [(set (match_operand:DWIH 0 "register_operand" "=r")
-	(match_operand:DWIH 1 "no_seg_address_operand" "p"))]
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(match_operand:P 1 "no_seg_address_operand" "p"))]
   ""
   "lea{<imodesuffix>}\t{%a1, %0|%0, %a1}"
   [(set_attr "type" "lea")
@@ -5991,8 +5672,7 @@
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
+      return "#";
 
     case TYPE_INCDEC:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
@@ -6005,38 +5685,24 @@
 	}
 
     default:
-      /* Use add as much as possible to replace lea for AGU optimization. */
-      if (which_alternative == 2 && TARGET_OPT_AGU)
-        return "add{<imodesuffix>}\t{%1, %0|%0, %1}";
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
         
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && (<MODE>mode != DImode
-	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (cond [(and (eq_attr "alternative" "2") 
-                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
-	      (const_string "lea")
-            (eq_attr "alternative" "3")
+     (cond [(eq_attr "alternative" "3")
               (const_string "lea")
-	    ; Current assemblers are broken and do not allow @GOTOFF in
-	    ; ought but a memory context.
-	    (match_operand:SWI48 2 "pic_symbolic_operand" "")
-	      (const_string "lea")
 	    (match_operand:SWI48 2 "incdec_operand" "")
 	      (const_string "incdec")
 	   ]
@@ -6055,18 +5721,17 @@
 ;; patterns constructed from addsi_1 to match.
 
 (define_insn "*addsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "general_operand" "g,li"))))
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
+		   (match_operand:SI 2 "general_operand" "g,0,li"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
-      return "lea{l}\t{%a2, %k0|%k0, %a2}";
+      return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
@@ -6078,25 +5743,22 @@
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %k0|%k0, %2}";
-        }
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
       return "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "1")
-	      (const_string "lea")
-	    ; Current assemblers are broken and do not allow @GOTOFF in
-	    ; ought but a memory context.
-	    (match_operand:SI 2 "pic_symbolic_operand" "")
+     (cond [(eq_attr "alternative" "2")
 	      (const_string "lea")
 	    (match_operand:SI 2 "incdec_operand" "")
 	      (const_string "incdec")
@@ -6129,16 +5791,9 @@
 	}
 
     default:
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
+      if (x86_maybe_negate_const_int (&operands[2], HImode))
+	return "sub{w}\t{%2, %0|%0, %2}";
+
       return "add{w}\t{%2, %0|%0, %2}";
     }
 }
@@ -6153,14 +5808,10 @@
 	(const_string "*")))
    (set_attr "mode" "HI")])
 
-;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
-;; type optimizations enabled by define-splits.  This is not important
-;; for PII, and in fact harmful because of partial register stalls.
-
 (define_insn "*addhi_1_lea"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
-	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:HI 2 "general_operand" "rn,rm,ln")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:HI 2 "general_operand" "rmn,rn,0,ln")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL
    && ix86_binary_operator_ok (PLUS, HImode, operands)"
@@ -6169,7 +5820,9 @@
     {
     case TYPE_LEA:
       return "#";
+
     case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
 	return "inc{w}\t%0";
       else
@@ -6179,32 +5832,36 @@
 	}
 
     default:
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
 	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
 	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], HImode))
+	return "sub{w}\t{%2, %0|%0, %2}";
+
       return "add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (eq_attr "alternative" "2")
-	(const_string "lea")
-	(if_then_else (match_operand:HI 2 "incdec_operand" "")
-	   (const_string "incdec")
-	   (const_string "alu"))))
+     (cond [(eq_attr "alternative" "3")
+              (const_string "lea")
+	    (match_operand:HI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "HI,HI,SI")])
-
+   (set_attr "mode" "HI,HI,HI,SI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
 (define_insn "*addqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
 	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
@@ -6226,14 +5883,8 @@
 	}
 
     default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
+      if (x86_maybe_negate_const_int (&operands[2], QImode))
 	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
 	  if (widen)
 	    return "sub{l}\t{%2, %k0|%k0, %2}";
 	  else
@@ -6256,21 +5907,24 @@
 	(const_string "*")))
    (set_attr "mode" "QI,QI,SI")])
 
-;; %%% Potential partial reg stall on alternative 2.  What to do?
+;; %%% Potential partial reg stall on alternatives 3 and 4.  What to do?
 (define_insn "*addqi_1_lea"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
-	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
-		 (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,q,r,r,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,r")
+		 (match_operand:QI 2 "general_operand" "qmn,qn,0,rn,0,ln")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL
    && ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
-  int widen = (which_alternative == 2);
+  int widen = (which_alternative == 3 || which_alternative == 4);
+
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
       return "#";
+
     case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
 	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
       else
@@ -6280,14 +5934,17 @@
 	}
 
     default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
+      /* For most processors, ADD is faster than LEA.  These alternatives
+	 were added to use ADD as much as possible.  */
+      if (which_alternative == 2 || which_alternative == 4)
 	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], QImode))
+	{
 	  if (widen)
 	    return "sub{l}\t{%2, %k0|%k0, %2}";
 	  else
@@ -6300,17 +5957,18 @@
     }
 }
   [(set (attr "type")
-     (if_then_else (eq_attr "alternative" "3")
-	(const_string "lea")
-	(if_then_else (match_operand:QI 2 "incdec_operand" "")
-	   (const_string "incdec")
-	   (const_string "alu"))))
+     (cond [(eq_attr "alternative" "5")
+              (const_string "lea")
+	    (match_operand:QI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "QI,QI,SI,SI")])
+   (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
 
 (define_insn "*addqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -6332,13 +5990,9 @@
 	}
 
     default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
-      if (CONST_INT_P (operands[1])
-	  && INTVAL (operands[1]) < 0)
-	{
-	  operands[1] = GEN_INT (-INTVAL (operands[1]));
-	  return "sub{b}\t{%1, %0|%0, %1}";
-	}
+      if (x86_maybe_negate_const_int (&operands[1], QImode))
+	return "sub{b}\t{%1, %0|%0, %1}";
+
       return "add{b}\t{%1, %0|%0, %1}";
     }
 }
@@ -6352,25 +6006,82 @@
         (const_string "none")))
    (set_attr "mode" "QI")])
 
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(plus (match_operand 1 "register_operand" "")
+              (match_operand 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
+  [(const_int 0)]
+{
+  rtx pat;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+     may confuse gen_lowpart.  */
+  if (mode != Pmode)
+    {
+      operands[1] = gen_lowpart (Pmode, operands[1]);
+      operands[2] = gen_lowpart (Pmode, operands[2]);
+    }
+
+  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+
+  if (TARGET_64BIT && mode != Pmode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+;; ??? This pattern handles immediate operands that do not satisfy immediate
+;; operand predicate (LEGITIMATE_CONSTANT_P) in the previous pattern.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "x86_64_immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed 
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 1) (match_dup 2)))])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
 (define_insn "*add<mode>_2"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:SWI48
-	    (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
-	    (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))
 	  (const_int 0)))
-   (set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
-	(plus:SWI48 (match_dup 1) (match_dup 2)))]
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{<imodesuffix>}\t%0";
       else
@@ -6380,27 +6091,14 @@
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* ???? In DImode, we ought to handle there the 32bit case too
-	 - do we need new constraint?  */
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && (<MODE>mode != DImode
-	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6420,10 +6118,7 @@
    (set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
@@ -6437,16 +6132,9 @@
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %k0|%k0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
       return "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
@@ -6461,111 +6149,19 @@
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addhi_2"
+(define_insn "*add<mode>_3"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:HI 2 "general_operand" "rmn,rn"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
-	(plus:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+	  (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_insn "*addqi_2"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:QI 2 "general_operand" "qmn,qn"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
-	(plus:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, QImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-		          && INTVAL (operands[2]) == 255));
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
-      if (CONST_INT_P (operands[2])
-          && INTVAL (operands[2]) < 0)
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      return "add{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
-(define_insn "*add<mode>_3"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (neg:SWI48 (match_operand:SWI48 2 "<general_operand>" "<g>"))
-	  (match_operand:SWI48 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:SWI48 0 "=r"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      if (operands[2] == const1_rtx)
         return "inc{<imodesuffix>}\t%0";
       else
         {
@@ -6574,27 +6170,14 @@
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* ???? In DImode, we ought to handle there the 32bit case too
-	 - do we need new constraint?  */
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && (<MODE>mode != DImode
-	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6613,10 +6196,7 @@
    (set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
@@ -6630,16 +6210,9 @@
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %k0|%k0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
       return "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
@@ -6654,96 +6227,10 @@
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addhi_3"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
-	  (match_operand:HI 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_insn "*addqi_3"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
-	  (match_operand:QI 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-			  && INTVAL (operands[2]) == 255));
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
-      if (CONST_INT_P (operands[2])
-          && INTVAL (operands[2]) < 0)
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      return "add{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
 ; For comparisons against 1, -1 and 128, we may generate better code
 ; by converting cmp to add, inc or dec as done by peephole2.  This pattern
 ; is matched then.  We can't accept general immediate, because for
 ; case of overflows,  the result is messed up.
-; This pattern also don't hold of 0x8000000000000000, since the value
-; overflows when negated.
 ; Also carry flag is reversed compared to cmp, so this conversion is valid
 ; only for comparisons not depending on it.
 
@@ -6768,17 +6255,10 @@
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if ((INTVAL (operands[2]) == -128
-	   || (INTVAL (operands[2]) > 0
-	       && INTVAL (operands[2]) != 128))
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-	return "sub{q}\t{%2, %0|%0, %2}";
-      operands[2] = GEN_INT (-INTVAL (operands[2]));
-      return "add{q}\t{%2, %0|%0, %2}";
+      if (x86_maybe_negate_const_int (&operands[2], DImode))
+	return "add{q}\t{%2, %0|%0, %2}";
+
+      return "sub{q}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -6796,90 +6276,37 @@
 ; by converting cmp to add, inc or dec as done by peephole2.  This pattern
 ; is matched then.  We can't accept general immediate, because for
 ; case of overflows,  the result is messed up.
-; This pattern also don't hold of 0x80000000, since the value overflows
-; when negated.
 ; Also carry flag is reversed compared to cmp, so this conversion is valid
 ; only for comparisons not depending on it.
 
-(define_insn "*addsi_4"
+(define_insn "*add<mode>_4"
   [(set (reg FLAGS_REG)
 	(compare
-	  (match_operand:SI 1 "nonimmediate_operand" "0")
-	  (match_operand:SI 2 "const_int_operand" "n")))
-   (clobber (match_scratch:SI 0 "=rm"))]
-  "ix86_match_ccmode (insn, CCGCmode)
-   && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
+	  (match_operand:SWI124 1 "nonimmediate_operand" "0")
+	  (match_operand:SWI124 2 "const_int_operand" "n")))
+   (clobber (match_scratch:SWI124 0 "=<r>m"))]
+  "ix86_match_ccmode (insn, CCGCmode)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
-        return "inc{l}\t%0";
+        return "inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{l}\t%0";
+          return "dec{<imodesuffix>}\t%0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if ((INTVAL (operands[2]) == -128
-	   || (INTVAL (operands[2]) > 0
-	       && INTVAL (operands[2]) != 128)))
-	return "sub{l}\t{%2, %0|%0, %2}";
-      operands[2] = GEN_INT (-INTVAL (operands[2]));
-      return "add{l}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:SI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "SI")])
-
-; See comments above addsi_4 for details.
-
-(define_insn "*addhi_4"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (match_operand:HI 1 "nonimmediate_operand" "0")
-	  (match_operand:HI 2 "const_int_operand" "n")))
-   (clobber (match_scratch:HI 0 "=rm"))]
-  "ix86_match_ccmode (insn, CCGCmode)
-   && (INTVAL (operands[2]) & 0xffff) != 0x8000"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == constm1_rtx)
-        return "inc{w}\t%0";
-      else
-	{
-	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{w}\t%0";
-	}
-
-    default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if ((INTVAL (operands[2]) == -128
-	   || (INTVAL (operands[2]) > 0
-	       && INTVAL (operands[2]) != 128)))
-	return "sub{w}\t{%2, %0|%0, %2}";
-      operands[2] = GEN_INT (-INTVAL (operands[2]));
-      return "add{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:<MODE> 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6887,66 +6314,22 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "HI")])
-
-; See comments above addsi_4 for details.
-
-(define_insn "*addqi_4"
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>_5"
   [(set (reg FLAGS_REG)
 	(compare
-	  (match_operand:QI 1 "nonimmediate_operand" "0")
-	  (match_operand:QI 2 "const_int_operand" "n")))
-   (clobber (match_scratch:QI 0 "=qm"))]
-  "ix86_match_ccmode (insn, CCGCmode)
-   && (INTVAL (operands[2]) & 0xff) != 0x80"
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (operands[2] == constm1_rtx
-	  || (CONST_INT_P (operands[2])
-	      && INTVAL (operands[2]) == 255))
-        return "inc{b}\t%0";
-      else
-	{
-	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{b}\t%0";
-	}
-
-    default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      if (INTVAL (operands[2]) < 0)
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "add{b}\t{%2, %0|%0, %2}";
-        }
-      return "sub{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
-(define_insn "*add<mode>_5"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:SWI48
-	    (match_operand:SWI48 1 "nonimmediate_operand" "%0")
-	    (match_operand:SWI48 2 "<general_operand>" "<g>"))
-	  (const_int 0)))
-   (clobber (match_scratch:SWI48 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{<imodesuffix>}\t%0";
       else
@@ -6956,25 +6339,14 @@
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && (<MODE>mode != DImode
-	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
-        }
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6984,92 +6356,6 @@
 	(const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*addhi_5"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-		   (match_operand:HI 2 "general_operand" "rmn"))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
-      else
-	{
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_insn "*addqi_5"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-		   (match_operand:QI 2 "general_operand" "qmn"))
-	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-			  && INTVAL (operands[2]) == 255));
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
-      if (CONST_INT_P (operands[2])
-          && INTVAL (operands[2]) < 0)
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      return "add{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
 (define_insn "*addqi_ext_1_rex64"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
 			 (const_int 8)
@@ -7090,9 +6376,7 @@
 	return "inc{b}\t%h0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-			  && INTVAL (operands[2]) == 255));
+	  gcc_assert (operands[2] == constm1_rtx);
           return "dec{b}\t%h0";
         }
 
@@ -7127,9 +6411,7 @@
 	return "inc{b}\t%h0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-			  && INTVAL (operands[2]) == 255));
+	  gcc_assert (operands[2] == constm1_rtx);
           return "dec{b}\t%h0";
 	}
 
@@ -7326,60 +6608,6 @@
 }
   [(set_attr "type" "lea")
    (set_attr "mode" "SI")])
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(plus:DI (match_operand:DI 1 "register_operand" "")
-		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed 
-   && ix86_lea_for_add_ok (PLUS, insn, operands)"
-  [(set (match_dup 0)
-	(plus:DI (match_dup 1)
-		 (match_dup 2)))]
-  "")
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(plus (match_operand 1 "register_operand" "")
-              (match_operand 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
-  [(const_int 0)]
-{
-  rtx pat;
-  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
-     may confuse gen_lowpart.  */
-  if (GET_MODE (operands[0]) != Pmode)
-    {
-      operands[1] = gen_lowpart (Pmode, operands[1]);
-      operands[2] = gen_lowpart (Pmode, operands[2]);
-    }
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-})
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(zero_extend:DI
-	  (plus:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "nonmemory_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(set (match_dup 0)
-	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_lowpart (Pmode, operands[2]);
-})
 
 ;; Subtract instructions
 
@@ -7410,7 +6638,7 @@
 		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
 		       (match_dup 5))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);")
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
 
 (define_insn "*sub<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
@@ -7514,8 +6742,7 @@
 			(const_int 0)])
 		      (match_operand:SWI 2 "<general_operand>" ""))))
      (clobber (reg:CC FLAGS_REG))])]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "")
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
 
 (define_insn "*<plusminus_insn><mode>3_carry"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
@@ -7569,10 +6796,10 @@
 	(compare:CCC
 	  (plus:SWI
 	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
-	    (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
 	  (match_dup 1)))
    (clobber (match_scratch:SWI 0 "=<r>"))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "add{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
@@ -7624,8 +6851,7 @@
 	(plusminus:XF
 	  (match_operand:XF 1 "register_operand" "")
 	  (match_operand:XF 2 "register_operand" "")))]
-  "TARGET_80387"
-  "")
+  "TARGET_80387")
 
 (define_expand "<plusminus_insn><mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "")
@@ -7633,8 +6859,7 @@
 	  (match_operand:MODEF 1 "register_operand" "")
 	  (match_operand:MODEF 2 "nonimmediate_operand" "")))]
   "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
-    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-  "")
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
 
 ;; Multiply instructions
 
@@ -7643,9 +6868,7 @@
 		   (mult:SWIM248
 		     (match_operand:SWIM248 1 "register_operand" "")
 		     (match_operand:SWIM248 2 "<general_operand>" "")))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_expand "mulqi3"
   [(parallel [(set (match_operand:QI 0 "register_operand" "")
@@ -7653,8 +6876,7 @@
 		     (match_operand:QI 1 "register_operand" "")
 		     (match_operand:QI 2 "nonimmediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_QIMODE_MATH"
-  "")
+  "TARGET_QIMODE_MATH")
 
 ;; On AMDFAM10
 ;; IMUL reg32/64, reg32/64, imm8 	Direct
@@ -7663,6 +6885,8 @@
 ;; IMUL reg32/64, mem32/64, imm32 	VectorPath
 ;; IMUL reg32/64, reg32/64 		Direct
 ;; IMUL reg32/64, mem32/64 		Direct
+;;
+;; On BDVER1, all above IMULs use DirectPath
 
 (define_insn "*mul<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r,r,r")
@@ -7691,6 +6915,7 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*mulsi3_1_zext"
@@ -7721,6 +6946,7 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
 ;; On AMDFAM10
@@ -7730,6 +6956,8 @@
 ;; IMUL reg16, mem16, imm16 	VectorPath
 ;; IMUL reg16, reg16 		Direct
 ;; IMUL reg16, mem16 		Direct
+;;
+;; On BDVER1, all HI MULs use DoublePath
 
 (define_insn "*mulhi3_1"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
@@ -7754,9 +6982,10 @@
 	(cond [(eq_attr "alternative" "0,1")
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set_attr "bdver1_decode" "double")
    (set_attr "mode" "HI")])
 
-;;On AMDFAM10
+;;On AMDFAM10 and BDVER1
 ;; MUL reg8 	Direct
 ;; MUL mem8 	Direct
 
@@ -7775,6 +7004,7 @@
         (const_string "vector")
         (const_string "direct")))
    (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "QI")])
 
 (define_expand "<u>mul<mode><dwi>3"
@@ -7784,9 +7014,7 @@
 		       (match_operand:DWIH 1 "nonimmediate_operand" ""))
 		     (any_extend:<DWI>
 		       (match_operand:DWIH 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_expand "<u>mulqihi3"
   [(parallel [(set (match_operand:HI 0 "register_operand" "")
@@ -7796,8 +7024,7 @@
 		     (any_extend:HI
 		       (match_operand:QI 2 "register_operand" ""))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_QIMODE_MATH"
-  "")
+  "TARGET_QIMODE_MATH")
 
 (define_insn "*<u>mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
@@ -7816,6 +7043,7 @@
         (const_string "vector")
         (const_string "double")))
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<u>mulqihi3_1"
@@ -7836,6 +7064,7 @@
         (const_string "vector")
         (const_string "direct")))
    (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "QI")])
 
 (define_expand "<s>mul<mode>3_highpart"
@@ -7875,6 +7104,7 @@
         (const_string "vector")
         (const_string "double")))
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "DI")])
 
 (define_insn "*<s>mulsi3_highpart_1"
@@ -7898,6 +7128,7 @@
         (const_string "vector")
         (const_string "double")))
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
 (define_insn "*<s>mulsi3_highpart_zext"
@@ -7921,6 +7152,7 @@
         (const_string "vector")
         (const_string "double")))
    (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
 ;; The patterns that match these are at the end of this file.
@@ -7929,46 +7161,31 @@
   [(set (match_operand:XF 0 "register_operand" "")
 	(mult:XF (match_operand:XF 1 "register_operand" "")
 		 (match_operand:XF 2 "register_operand" "")))]
-  "TARGET_80387"
-  "")
+  "TARGET_80387")
 
 (define_expand "mul<mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "")
 	(mult:MODEF (match_operand:MODEF 1 "register_operand" "")
 		    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
   "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
-    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-  "")
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
 
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
   [(set (match_operand:XF 0 "register_operand" "")
 	(div:XF (match_operand:XF 1 "register_operand" "")
 		(match_operand:XF 2 "register_operand" "")))]
-  "TARGET_80387"
-  "")
+  "TARGET_80387")
 
 (define_expand "divdf3"
   [(set (match_operand:DF 0 "register_operand" "")
  	(div:DF (match_operand:DF 1 "register_operand" "")
  		(match_operand:DF 2 "nonimmediate_operand" "")))]
    "(TARGET_80387 && X87_ENABLE_ARITH (DFmode))
-    || (TARGET_SSE2 && TARGET_SSE_MATH)"
-   "")
+    || (TARGET_SSE2 && TARGET_SSE_MATH)")
 
 (define_expand "divsf3"
   [(set (match_operand:SF 0 "register_operand" "")
@@ -7996,9 +7213,61 @@
 		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
 	      (set (match_operand:SWIM248 3 "register_operand" "")
 		   (mod:SWIM248 (match_dup 1) (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+
+(define_insn_and_split "divmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*divmod<mode>4"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
@@ -8009,7 +7278,7 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  "&& reload_completed"
+  "reload_completed"
   [(parallel [(set (match_dup 1)
 		   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])
@@ -8020,7 +7289,7 @@
 	      (use (match_dup 1))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
 
   if (<MODE>mode != HImode
       && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
@@ -8048,6 +7317,68 @@
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8));
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+;; Change div/mod to HImode and extend the second argument to HImode
+;; so that mode of div/mod matches with mode of arguments.  Otherwise
+;; combine may fail.
+(define_insn "divmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(sign_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (sign_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "udivmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (udiv:SWIM248
@@ -8055,9 +7386,48 @@
 		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
 	      (set (match_operand:SWIM248 3 "register_operand" "")
 		   (umod:SWIM248 (match_dup 1) (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+
+(define_insn_and_split "udivmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*udivmod<mode>4"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
@@ -8068,7 +7438,7 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  "&& reload_completed"
+  "reload_completed"
   [(set (match_dup 1) (const_int 0))
    (parallel [(set (match_dup 0)
 		   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
@@ -8093,6 +7463,63 @@
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "udivmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (udiv:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (umod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
+  tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0);
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+(define_insn "udivmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(zero_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (zero_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 ;; We cannot use div/idiv for double division, because it causes
 ;; "division by zero" on the overflow and that's not what we expect
 ;; from truncate.  Because true (non truncating) double division is
@@ -8122,17 +7549,21 @@
 	(compare:CCNO
 	  (and:SI (match_operand:SI 0 "nonimmediate_operand" "")
 		  (match_operand:SI 1 "nonmemory_operand" ""))
-	  (const_int 0)))]
-  ""
-  "")
+	  (const_int 0)))])
 
 (define_expand "testqi_ccz_1"
   [(set (reg:CCZ FLAGS_REG)
         (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "")
 			     (match_operand:QI 1 "nonmemory_operand" ""))
-		 (const_int 0)))]
-  ""
-  "")
+		 (const_int 0)))])
+
+(define_expand "testdi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:DI (match_operand:DI 0 "nonimmediate_operand" "")
+		  (match_operand:DI 1 "x86_64_szext_general_operand" ""))
+	  (const_int 0)))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
 
 (define_insn "*testdi_1"
   [(set (reg FLAGS_REG)
@@ -8202,9 +7633,7 @@
 	      (const_int 8)
 	      (const_int 8))
 	    (match_operand 1 "const_int_operand" ""))
-	  (const_int 0)))]
-  ""
-  "")
+	  (const_int 0)))])
 
 (define_insn "*testqi_ext_0"
   [(set (reg FLAGS_REG)
@@ -8879,7 +8308,7 @@
 	 (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
@@ -8891,9 +8320,9 @@
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, QImode, operands)"
   "@
-   <logicprefix>{b}\t{%2, %0|%0, %2}
-   <logicprefix>{b}\t{%2, %0|%0, %2}
-   <logicprefix>{l}\t{%k2, %k0|%k0, %k2}"
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{l}\t{%k2, %k0|%k0, %k2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI,QI,SI")])
 
@@ -8905,7 +8334,7 @@
 		    (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
@@ -8916,7 +8345,7 @@
 	 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
@@ -8927,7 +8356,7 @@
    (clobber (reg:CC FLAGS_REG))]
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "<logicprefix>{b}\t{%1, %0|%0, %1}"
+  "<logic>{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
@@ -8941,7 +8370,7 @@
 	(any_or:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
@@ -8956,7 +8385,7 @@
 	(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
@@ -8970,7 +8399,7 @@
 	(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
@@ -8984,7 +8413,7 @@
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "<logicprefix>{b}\t{%1, %0|%0, %1}"
+  "<logic>{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
@@ -8996,8 +8425,8 @@
 		 (const_int 0)))
    (clobber (match_scratch:SWI 0 "=<r>"))]
   "ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
@@ -9013,7 +8442,7 @@
 	  (match_operand 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "1")
    (set_attr "modrm" "1")
@@ -9033,7 +8462,7 @@
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
@@ -9052,7 +8481,7 @@
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
@@ -9070,7 +8499,7 @@
 			   (const_int 8))))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "<logicprefix>{b}\t{%h2, %h0|%h0, %h2}"
+  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
@@ -9134,9 +8563,7 @@
 	     (match_dup 1)
 	     (const_int 8)
 	     (const_int 8))
-	    (match_dup 2)))])]
-  ""
-  "")
+	    (match_dup 2)))])])
 
 (define_insn "*xorqi_cc_ext_1_rex64"
   [(set (reg FLAGS_REG)
@@ -9217,7 +8644,7 @@
     [(set (match_dup 2)
 	  (neg:DWIH (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
-  "split_<dwi> (&operands[0], 2, &operands[0], &operands[2]);")
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
 
 (define_insn "*neg<mode>2_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
@@ -9460,7 +8887,7 @@
   "TARGET_80387
    && (reload_completed
        || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "<MODE>")])
 
@@ -9469,7 +8896,7 @@
 	(absneg:DF (float_extend:DF
 		     (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 
@@ -9478,16 +8905,16 @@
 	(absneg:XF (float_extend:XF
 		     (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
 (define_insn "*<code>extenddfxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(absneg:XF (float_extend:XF
-		      (match_operand:DF 1 "register_operand" "0"))))]
+		     (match_operand:DF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
@@ -9502,10 +8929,7 @@
    (match_operand:CSGNMODE 2 "register_operand" "")]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE2 && (<MODE>mode == TFmode))"
-{
-  ix86_expand_copysign (operands);
-  DONE;
-})
+  "ix86_expand_copysign (operands); DONE;")
 
 (define_insn_and_split "copysign<mode>3_const"
   [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
@@ -9519,10 +8943,7 @@
   "#"
   "&& reload_completed"
   [(const_int 0)]
-{
-  ix86_split_copysign_const (operands);
-  DONE;
-})
+  "ix86_split_copysign_const (operands); DONE;")
 
 (define_insn "copysign<mode>3_var"
   [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
@@ -9550,10 +8971,7 @@
     || (TARGET_SSE2 && (<MODE>mode == TFmode)))
    && reload_completed"
   [(const_int 0)]
-{
-  ix86_split_copysign_var (operands);
-  DONE;
-})
+  "ix86_split_copysign_var (operands); DONE;")
 
 ;; One complement instructions
 
@@ -9616,8 +9034,7 @@
 		   (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
 				    (const_int 0)]))
 	      (set (match_dup 1)
-		   (xor:SWI (match_dup 3) (const_int -1)))])]
-  "")
+		   (xor:SWI (match_dup 3) (const_int -1)))])])
 
 ;; ??? Currently never generated - xor is used instead.
 (define_insn "*one_cmplsi2_2_zext"
@@ -9644,10 +9061,9 @@
 		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
 				    (const_int 0)]))
 	      (set (match_dup 1)
-		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]
-  "")
+		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
 
-;; Arithmetic shift instructions
+;; Shift instructions
 
 ;; DImode shifts are implemented using the i386 "shift double" opcode,
 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
@@ -9671,42 +9087,46 @@
 ;; shift pair, instead using moves and sign extension for counts greater
 ;; than 31.
 
-(define_expand "ashlti3"
-  [(set (match_operand:TI 0 "register_operand" "")
-	(ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;")
-
-(define_insn "*ashlti3_1"
-  [(set (match_operand:TI 0 "register_operand" "=&r,r")
-	(ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0")
-		   (match_operand:QI 2 "nonmemory_operand" "Oc,Oc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
+(define_expand "ashl<mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>" "")
+	(ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>" "")
+		      (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
+
+(define_insn "*ashl<mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0")
+		    (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
   "#"
   [(set_attr "type" "multi")])
 
-(define_peephole2
-  [(match_scratch:DI 3 "r")
-   (parallel [(set (match_operand:TI 0 "register_operand" "")
-		   (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
-			      (match_operand:QI 2 "nonmemory_operand" "")))
+(define_split
+  [(set (match_operand:DWI 0 "register_operand" "")
+	(ashift:DWI (match_operand:DWI 1 "nonmemory_operand" "")
+		    (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (ashift:<DWI>
+		     (match_operand:<DWI> 1 "nonmemory_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
    (match_dup 3)]
-  "TARGET_64BIT"
-  [(const_int 0)]
-  "ix86_split_ashl (operands, operands[3], TImode); DONE;")
-
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-	(ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		    ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
 
 (define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
@@ -9721,34 +9141,53 @@
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")])
-
-(define_expand "x86_64_shift_adj_1"
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_expand "x86_shift<mode>_adj_1"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
-			     (const_int 64))
+			     (match_dup 4))
 		     (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "")
-        (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
-			 (match_operand:DI 1 "register_operand" "")
-			 (match_dup 0)))
+   (set (match_operand:SWI48 0 "register_operand" "")
+        (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 1 "register_operand" "")
+			    (match_dup 0)))
    (set (match_dup 1)
-	(if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
-			 (match_operand:DI 3 "register_operand" "r")
-			 (match_dup 1)))]
-  "TARGET_64BIT"
-  "")
-
-(define_expand "x86_64_shift_adj_2"
-  [(use (match_operand:DI 0 "register_operand" ""))
-   (use (match_operand:DI 1 "register_operand" ""))
+	(if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 3 "register_operand" "r")
+			    (match_dup 1)))]
+  "TARGET_CMOVE"
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
+
+(define_expand "x86_shift<mode>_adj_2"
+  [(use (match_operand:SWI48 0 "register_operand" ""))
+   (use (match_operand:SWI48 1 "register_operand" ""))
    (use (match_operand:QI 2 "register_operand" ""))]
-  "TARGET_64BIT"
+  ""
 {
   rtx label = gen_label_rtx ();
   rtx tmp;
 
-  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
 
   tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
   tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
@@ -9767,42 +9206,56 @@
   DONE;
 })
 
-(define_expand "ashldi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-	(ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
-
-(define_insn "*ashldi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cJ,M")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*ashl<mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(ashift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "nonimmediate_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (ashift:SWI48 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (can_create_pseudo_p ())
+    operands [2] = force_reg (SImode, operands[2]);
+
+  operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      return "add{q}\t%0, %0";
-
-    case TYPE_LEA:
-      gcc_assert (CONST_INT_P (operands[2]));
-      gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3);
-      operands[1] = gen_rtx_MULT (DImode, operands[1],
-				  GEN_INT (1 << INTVAL (operands[2])));
-      return "lea{q}\t{%a1, %0|%0, %a1}";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{q}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{q}\t%0";
-      else
-	return "sal{q}\t{%2, %0|%0, %2}";
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -9824,343 +9277,28 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "DI")])
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(ashift:DI (match_operand:DI 1 "index_register_operand" "")
-		   (match_operand:QI 2 "immediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(set (match_dup 0)
-	(mult:DI (match_dup 1)
-		 (match_dup 2)))]
-  "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);")
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashldi3_cmp_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_63_operand" "J"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(ashift:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun)
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-	   && (TARGET_SHIFT1
-	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{q}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{q}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{q}\t%0";
-      else
-	return "sal{q}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashldi3_cconly_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_63_operand" "J"))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun)
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-	   && (TARGET_SHIFT1
-	       || TARGET_DOUBLE_WITH_ADD)))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{q}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{q}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{q}\t%0";
-      else
-	return "sal{q}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashldi3_1"
-  [(set (match_operand:DI 0 "register_operand" "=&r,r")
-	(ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
-		   (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
-
-;; By default we don't ask for a scratch register, because when DImode
-;; values are manipulated, registers are already at a premium.  But if
-;; we have one handy, we won't turn it away.
-(define_peephole2
-  [(match_scratch:SI 3 "r")
-   (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
-			      (match_operand:QI 2 "nonmemory_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_dup 3)]
-  "!TARGET_64BIT && TARGET_CMOVE"
-  [(const_int 0)]
-  "ix86_split_ashl (operands, operands[3], DImode); DONE;")
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		     ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
-
-(define_insn "x86_shld"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
-        (ior:SI (ashift:SI (match_dup 0)
-		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
-		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
-		  (minus:QI (const_int 32) (match_dup 2)))))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")])
-
-(define_expand "x86_shift_adj_1"
-  [(set (reg:CCZ FLAGS_REG)
-	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
-			     (const_int 32))
-		     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "")
-        (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
-			 (match_operand:SI 1 "register_operand" "")
-			 (match_dup 0)))
-   (set (match_dup 1)
-	(if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
-			 (match_operand:SI 3 "register_operand" "r")
-			 (match_dup 1)))]
-  "TARGET_CMOVE"
-  "")
-
-(define_expand "x86_shift_adj_2"
-  [(use (match_operand:SI 0 "register_operand" ""))
-   (use (match_operand:SI 1 "register_operand" ""))
-   (use (match_operand:QI 2 "register_operand" ""))]
-  ""
-{
-  rtx label = gen_label_rtx ();
-  rtx tmp;
-
-  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
-
-  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
-  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-			      gen_rtx_LABEL_REF (VOIDmode, label),
-			      pc_rtx);
-  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-  JUMP_LABEL (tmp) = label;
-
-  emit_move_insn (operands[0], operands[1]);
-  ix86_expand_clear (operands[1]);
-
-  emit_label (label);
-  LABEL_NUSES (label) = 1;
-
-  DONE;
-})
-
-(define_expand "ashlsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;")
-
-(define_insn "*ashlsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, SImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      return "add{l}\t%0, %0";
-
-    case TYPE_LEA:
-      return "#";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{l}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{l}\t%0";
-      else
-	return "sal{l}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(eq_attr "alternative" "1")
-	      (const_string "lea")
-            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "SI")])
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(ashift (match_operand 1 "index_register_operand" "")
-                (match_operand:QI 2 "const_int_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])
-   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4"
-  [(const_int 0)]
-{
-  rtx pat;
-  enum machine_mode mode = GET_MODE (operands[0]);
-
-  if (GET_MODE_SIZE (mode) < 4)
-    operands[0] = gen_lowpart (SImode, operands[0]);
-  if (mode != Pmode)
-    operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
-
-  pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-})
-
-;; Rare case of shifting RSP is handled by generating move and shift
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(ashift (match_operand 1 "register_operand" "")
-                (match_operand:QI 2 "const_int_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(const_int 0)]
-{
-  rtx pat, clob;
-  emit_move_insn (operands[0], operands[1]);
-  pat = gen_rtx_SET (VOIDmode, operands[0],
-		     gen_rtx_ASHIFT (GET_MODE (operands[0]),
-				     operands[0], operands[2]));
-  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob)));
-  DONE;
-})
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashlsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-			(match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       return "add{l}\t%k0, %k0";
 
-    case TYPE_LEA:
-      return "#";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{l}\t{%b2, %k0|%k0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
 	return "sal{l}\t%k0";
       else
 	return "sal{l}\t{%2, %k0|%k0, %2}";
@@ -10186,55 +9324,26 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(zero_extend:DI (ashift (match_operand 1 "register_operand" "")
-				(match_operand:QI 2 "const_int_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(set (match_dup 0) (zero_extend:DI
-			(subreg:SI (mult:SI (match_dup 1)
-					    (match_dup 2)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
-})
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashlsi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(ashift:SI (match_dup 1) (match_dup 2)))]
-   "(optimize_function_for_size_p (cfun)
-     || !TARGET_PARTIAL_FLAG_REG_STALL
-     || (operands[2] == const1_rtx
-	 && (TARGET_SHIFT1
-	     || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+(define_insn "*ashlhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      return "add{l}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{l}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{l}\t%0";
-      else
-	return "sal{l}\t{%2, %0|%0, %2}";
+      return "add{w}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -10254,115 +9363,7 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashlsi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun)
-    || !TARGET_PARTIAL_FLAG_REG_STALL
-    || (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1
-	    || TARGET_DOUBLE_WITH_ADD)))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{l}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{l}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{l}\t%0";
-      else
-	return "sal{l}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashlsi3_cmp_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun)
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-	   && (TARGET_SHIFT1
-	       || TARGET_DOUBLE_WITH_ADD)))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{l}\t%k0, %k0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{l}\t{%b2, %k0|%k0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{l}\t%k0";
-      else
-	return "sal{l}\t{%2, %k0|%k0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		     (const_int 0))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "SI")])
-
-(define_expand "ashlhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;")
+   (set_attr "mode" "HI")])
 
 (define_insn "*ashlhi3_1_lea"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
@@ -10376,15 +9377,14 @@
     {
     case TYPE_LEA:
       return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       return "add{w}\t%0, %0";
 
     default:
-      if (REG_P (operands[2]))
-	return "sal{w}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
 	return "sal{w}\t%0";
       else
 	return "sal{w}\t{%2, %0|%0, %2}";
@@ -10411,28 +9411,39 @@
        (const_string "*")))
    (set_attr "mode" "HI,SI")])
 
-(define_insn "*ashlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		   (match_operand:QI 2 "nonmemory_operand" "cI")))
+(define_insn "*ashlqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      return "add{w}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{w}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{w}\t%0";
-      else
-	return "sal{w}\t{%2, %0|%0, %2}";
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%k0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
     }
 }
   [(set (attr "type")
@@ -10452,121 +9463,9 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "HI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashlhi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(ashift:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun)
-    || !TARGET_PARTIAL_FLAG_REG_STALL
-    || (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1
-	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{w}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{w}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{w}\t%0";
-      else
-	return "sal{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_insn "*ashlhi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun)
-    || !TARGET_PARTIAL_FLAG_REG_STALL
-    || (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1
-	    || TARGET_DOUBLE_WITH_ADD)))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      gcc_assert (operands[2] == const1_rtx);
-      return "add{w}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{w}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{w}\t%0";
-      else
-	return "sal{w}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
-		 (match_operand 2 "const1_operand" ""))
-	      (const_string "alu")
-	   ]
-	   (const_string "ishift")))
-   (set (attr "length_immediate")
-     (if_then_else
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "ishift")
-		 (and (match_operand 2 "const1_operand" "")
-		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-			  (const_int 0)))))
-       (const_string "0")
-       (const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_expand "ashlqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;")
+   (set_attr "mode" "QI,SI")])
 
 ;; %%% Potential partial reg stall on alternative 2.  What to do?
-
 (define_insn "*ashlqi3_1_lea"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r")
 	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
@@ -10579,6 +9478,7 @@
     {
     case TYPE_LEA:
       return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
@@ -10587,18 +9487,11 @@
         return "add{b}\t%0, %0";
 
     default:
-      if (REG_P (operands[2]))
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
 	{
 	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t{%b2, %k0|%k0, %b2}";
-	  else
-	    return "sal{b}\t{%b2, %0|%0, %b2}";
-	}
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	{
-	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t%0";
+	    return "sal{l}\t%k0";
 	  else
 	    return "sal{b}\t%0";
 	}
@@ -10632,46 +9525,126 @@
        (const_string "*")))
    (set_attr "mode" "QI,SI,SI")])
 
-(define_insn "*ashlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
-	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+(define_insn "*ashlqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(ashift:QI (match_dup 0)
+		   (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[1] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[1] == const1_rtx);
+      return "add{b}\t%0, %0";
+
+    default:
+      if (operands[1] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{b}\t%0";
+      else
+	return "sal{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 1 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift1")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift1")
+		 (and (match_operand 1 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ashift (match_operand 1 "index_register_operand" "")
+                (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  rtx pat;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  if (mode != Pmode)
+    operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+
+  pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+
+  if (TARGET_64BIT && mode != Pmode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "index_register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (mult:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashl<mode>3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(ashift:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
-        return "add{l}\t%k0, %k0";
-      else
-        return "add{b}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	{
-	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t{%b2, %k0|%k0, %b2}";
-	  else
-	    return "sal{b}\t{%b2, %0|%0, %b2}";
-	}
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	{
-	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t%0";
-	  else
-	    return "sal{b}\t%0";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t{%2, %k0|%k0, %2}";
-	  else
-	    return "sal{b}\t{%2, %0|%0, %2}";
-	}
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -10691,47 +9664,42 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashlqi3_cmp"
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashlsi3_cmp_zext"
   [(set (reg FLAGS_REG)
 	(compare
-	  (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
 		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
 	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(ashift:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun)
-    || !TARGET_PARTIAL_FLAG_REG_STALL
-    || (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1
-	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      return "add{b}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{b}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{b}\t%0";
-      else
-	return "sal{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
-		          (const_int 0))
-		      (match_operand 0 "register_operand" ""))
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
 	      (const_string "alu")
 	   ]
@@ -10745,37 +9713,34 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashlqi3_cconly"
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashl<mode>3_cconly"
   [(set (reg FLAGS_REG)
 	(compare
-	  (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
 	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
+   (clobber (match_scratch:SWI 0 "=<r>"))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
 	&& (TARGET_SHIFT1
 	    || TARGET_DOUBLE_WITH_ADD)))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+   && ix86_match_ccmode (insn, CCGOCmode)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      return "add{b}\t%0, %0";
-
-    default:
-      if (REG_P (operands[2]))
-	return "sal{b}\t{%b2, %0|%0, %b2}";
-      else if (operands[2] == const1_rtx
-	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "sal{b}\t%0";
-      else
-	return "sal{b}\t{%2, %0|%0, %2}";
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -10795,46 +9760,71 @@
 			  (const_int 0)))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI")])
-
-;; See comment above `ashldi3' about how this works.
-
-(define_expand "ashrti3"
-  [(set (match_operand:TI 0 "register_operand" "")
-	(ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;")
-
-(define_insn "*ashrti3_1"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-	(ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
-		     (match_operand:QI 2 "nonmemory_operand" "Oc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
+   (set_attr "mode" "<MODE>")])
+
+;; See comment above `ashl<mode>3' about how this works.
+
+(define_expand "<shiftrt_insn><mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>" "")
+	(any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>" "")
+			   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*<shiftrt_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "nonimmediate_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (can_create_pseudo_p ())
+    operands [2] = force_reg (SImode, operands[2]);
+
+  operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<shiftrt_insn><mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=r")
+	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_<shiftrt_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
   [(set_attr "type" "multi")])
 
-(define_peephole2
-  [(match_scratch:DI 3 "r")
-   (parallel [(set (match_operand:TI 0 "register_operand" "")
-		   (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-			        (match_operand:QI 2 "nonmemory_operand" "")))
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (any_shiftrt:<DWI>
+		     (match_operand:<DWI> 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
    (match_dup 3)]
-  "TARGET_64BIT"
-  [(const_int 0)]
-  "ix86_split_ashr (operands, operands[3], TImode); DONE;")
-
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-	(ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		    ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_<shiftrt_insn> (operands, operands[3], <DWI>mode); DONE;")
 
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
@@ -10849,187 +9839,8 @@
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
    (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")])
-
-(define_expand "ashrdi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-	(ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
-
-(define_expand "x86_64_shift_adj_3"
-  [(use (match_operand:DI 0 "register_operand" ""))
-   (use (match_operand:DI 1 "register_operand" ""))
-   (use (match_operand:QI 2 "register_operand" ""))]
-  ""
-{
-  rtx label = gen_label_rtx ();
-  rtx tmp;
-
-  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
-
-  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
-  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-			      gen_rtx_LABEL_REF (VOIDmode, label),
-			      pc_rtx);
-  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-  JUMP_LABEL (tmp) = label;
-
-  emit_move_insn (operands[0], operands[1]);
-  emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63)));
-
-  emit_label (label);
-  LABEL_NUSES (label) = 1;
-
-  DONE;
-})
-
-(define_insn "ashrdi3_63_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
-	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
-		     (match_operand:DI 2 "const_int_operand" "i,i")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && INTVAL (operands[2]) == 63
-   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "@
-   {cqto|cqo}
-   sar{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashrdi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "sar{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashrdi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "@
-   sar{q}\t{%2, %0|%0, %2}
-   sar{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrdi3_one_bit_cmp_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "sar{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashrdi3_one_bit_cconly_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "sar{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrdi3_cmp_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "sar{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashrdi3_cconly_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
-  "sar{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-(define_insn "*ashrdi3_1"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
-		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
-
-;; By default we don't ask for a scratch register, because when DImode
-;; values are manipulated, registers are already at a premium.  But if
-;; we have one handy, we won't turn it away.
-(define_peephole2
-  [(match_scratch:SI 3 "r")
-   (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
-			        (match_operand:QI 2 "nonmemory_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_dup 3)]
-  "!TARGET_64BIT && TARGET_CMOVE"
-  [(const_int 0)]
-  "ix86_split_ashr (operands, operands[3], DImode); DONE;")
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		     ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
 
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
@@ -11042,48 +9853,33 @@
   "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
    (set_attr "pent_pair" "np")
-   (set_attr "mode" "SI")])
-
-(define_expand "x86_shift_adj_3"
-  [(use (match_operand:SI 0 "register_operand" ""))
-   (use (match_operand:SI 1 "register_operand" ""))
-   (use (match_operand:QI 2 "register_operand" ""))]
-  ""
-{
-  rtx label = gen_label_rtx ();
-  rtx tmp;
-
-  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
-
-  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
-  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-			      gen_rtx_LABEL_REF (VOIDmode, label),
-			      pc_rtx);
-  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-  JUMP_LABEL (tmp) = label;
-
-  emit_move_insn (operands[0], operands[1]);
-  emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31)));
-
-  emit_label (label);
-  LABEL_NUSES (label) = 1;
-
-  DONE;
-})
-
-(define_expand "ashrsi3_31"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
-	           (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
-		                (match_operand:SI 2 "const_int_operand" "i,i")))
-              (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_insn "*ashrsi3_31"
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "ashrdi3_cvt"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 63
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "@
+   {cqto|cqo}
+   sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "DI")])
+
+(define_insn "ashrsi3_cvt"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
 	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
-		     (match_operand:SI 2 "const_int_operand" "i,i")))
+		     (match_operand:QI 2 "const_int_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "INTVAL (operands[2]) == 31
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
@@ -11097,13 +9893,14 @@
    (set_attr "modrm" "0,1")
    (set_attr "mode" "SI")])
 
-(define_insn "*ashrsi3_31_zext"
+(define_insn "*ashrsi3_cvt_zext"
   [(set (match_operand:DI 0 "register_operand" "=*d,r")
-	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
-				     (match_operand:SI 2 "const_int_operand" "i,i"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && INTVAL (operands[2]) == 31
+	(zero_extend:DI
+	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+		       (match_operand:QI 2 "const_int_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "@
    {cltd|cdq}
@@ -11114,1380 +9911,414 @@
    (set_attr "modrm" "0,1")
    (set_attr "mode" "SI")])
 
-(define_expand "ashrsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;")
-
-(define_insn "*ashrsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_1_one_bit_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
-				     (match_operand:QI 2 "const1_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t%k0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "@
-   sar{l}\t{%2, %0|%0, %2}
-   sar{l}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0")
-				     (match_operand:QI 2 "nonmemory_operand" "I,c"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "@
-   sar{l}\t{%2, %k0|%k0, %2}
-   sar{l}\t{%b2, %k0|%k0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrsi3_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_one_bit_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_one_bit_cmp_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t%k0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrsi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*ashrsi3_cmp_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "sar{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_expand "ashrhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;")
-
-(define_insn "*ashrhi3_1_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "sar{w}\t%0"
+(define_expand "x86_shift<mode>_adj_3"
+  [(use (match_operand:SWI48 0 "register_operand" ""))
+   (use (match_operand:SWI48 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
+				  GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
   [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*ashrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "@
-   sar{w}\t{%2, %0|%0, %2}
-   sar{w}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrhi3_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "sar{w}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*ashrhi3_one_bit_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "sar{w}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrhi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "sar{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-(define_insn "*ashrhi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
-  "sar{w}\t{%2, %0|%0, %2}"
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{l}\t%k0";
+  else
+    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+}
   [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-(define_expand "ashrqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;")
-
-(define_insn "*ashrqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashrqi3_1_one_bit_slp"
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-	(ashiftrt:QI (match_dup 0)
-		     (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t%0"
+	(any_shiftrt:QI (match_dup 0)
+			(match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{b}\t%0";
+  else
+    return "<shiftrt>{b}\t{%1, %0|%0, %1}";
+}
   [(set_attr "type" "ishift1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "@
-   sar{b}\t{%2, %0|%0, %2}
-   sar{b}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashrqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-	(ashiftrt:QI (match_dup 0)
-		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   sar{b}\t{%1, %0|%0, %1}
-   sar{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "ishift1")
-   (set_attr "mode" "QI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*ashrqi3_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashrqi3_one_bit_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
    (set_attr "mode" "QI")])
 
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
-(define_insn "*ashrqi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
-
-(define_insn "*ashrqi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
-  "sar{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
-
-
-;; Logical shift instructions
-
-;; See comment above `ashldi3' about how this works.
-
-(define_expand "lshrti3"
-  [(set (match_operand:TI 0 "register_operand" "")
-	(lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;")
-
-(define_insn "*lshrti3_1"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-	(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
-		     (match_operand:QI 2 "nonmemory_operand" "Oc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
-
-(define_peephole2
-  [(match_scratch:DI 3 "r")
-   (parallel [(set (match_operand:TI 0 "register_operand" "")
-		   (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-			        (match_operand:QI 2 "nonmemory_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_dup 3)]
-  "TARGET_64BIT"
-  [(const_int 0)]
-  "ix86_split_lshr (operands, operands[3], TImode); DONE;")
-
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-	(lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		    ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
-
-(define_expand "lshrdi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-	(lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
-
-(define_insn "*lshrdi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*lshrdi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "@
-   shr{q}\t{%2, %0|%0, %2}
-   shr{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrdi3_cmp_one_bit_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*lshrdi3_cconly_one_bit_rex64"
+(define_insn "*<shiftrt_insn><mode>3_cmp"
   [(set (reg FLAGS_REG)
 	(compare
-	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{q}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrdi3_cmp_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
 	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-(define_insn "*lshrdi3_cconly_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{q}\t{%2, %0|%0, %2}"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
   [(set_attr "type" "ishift")
-   (set_attr "mode" "DI")])
-
-(define_insn "*lshrdi3_1"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
-		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
-
-;; By default we don't ask for a scratch register, because when DImode
-;; values are manipulated, registers are already at a premium.  But if
-;; we have one handy, we won't turn it away.
-(define_peephole2
-  [(match_scratch:SI 3 "r")
-   (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
-			        (match_operand:QI 2 "nonmemory_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_dup 3)]
-  "!TARGET_64BIT && TARGET_CMOVE"
-  [(const_int 0)]
-  "ix86_split_lshr (operands, operands[3], DImode); DONE;")
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		     ? epilogue_completed : reload_completed)"
-  [(const_int 0)]
-  "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
-
-(define_expand "lshrsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;")
-
-(define_insn "*lshrsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_1_one_bit_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t%k0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "@
-   shr{l}\t{%2, %0|%0, %2}
-   shr{l}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI
-	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		       (match_operand:QI 2 "nonmemory_operand" "I,c"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "@
-   shr{l}\t{%2, %k0|%k0, %2}
-   shr{l}\t{%b2, %k0|%k0, %b2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrsi3_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_one_bit_cconly"
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shiftrt_insn>si3_cmp_zext"
   [(set (reg FLAGS_REG)
 	(compare
-	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_cmp_one_bit_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t%k0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrsi3_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_cconly"
-  [(set (reg FLAGS_REG)
-      (compare
-	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
-        (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_insn "*lshrsi3_cmp_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:QI 2 "const_1_to_31_operand" "I"))
 	  (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
-	(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
-   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "SI")])
-
-(define_expand "lshrhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;")
-
-(define_insn "*lshrhi3_1_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{w}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*lshrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "@
-   shr{w}\t{%2, %0|%0, %2}
-   shr{w}\t{%b2, %0|%0, %b2}"
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{l}\t%k0";
+  else
+    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+}
   [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrhi3_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{w}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*lshrhi3_one_bit_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{w}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrhi3_cmp"
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn><mode>3_cconly"
   [(set (reg FLAGS_REG)
 	(compare
-	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-(define_insn "*lshrhi3_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "register_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
 	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
-  "shr{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "HI")])
-
-(define_expand "lshrqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;")
-
-(define_insn "*lshrqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "shr{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*lshrqi3_1_one_bit_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-	(lshiftrt:QI (match_dup 0)
-		     (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
-  "shr{b}\t%0"
-  [(set_attr "type" "ishift1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*lshrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "@
-   shr{b}\t{%2, %0|%0, %2}
-   shr{b}\t{%b2, %0|%0, %b2}"
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
   [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
-
-(define_insn "*lshrqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-	(lshiftrt:QI (match_dup 0)
-		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   shr{b}\t{%1, %0|%0, %1}
-   shr{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "ishift1")
-   (set_attr "mode" "QI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrqi2_one_bit_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "shr{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*lshrqi2_one_bit_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))
-	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "shr{b}\t%0"
-  [(set_attr "type" "ishift")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-;; This pattern can't accept a variable shift count, since shifts by
-;; zero don't affect the flags.  We assume that shifts by constant
-;; zero are optimized away.
-(define_insn "*lshrqi2_cmp"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "shr{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
-
-(define_insn "*lshrqi2_cconly"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
-	  (const_int 0)))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
-  "shr{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ishift")
-   (set_attr "mode" "QI")])
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
 
 ;; Rotate instructions
 
-(define_expand "rotldi3"
+(define_expand "<rotate_insn>ti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(any_rotate:TI (match_operand:TI 1 "register_operand" "")
+		       (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (const_1_to_63_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>ti3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn>di3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
-	(rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
+	(any_rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+		       (match_operand:QI 2 "nonmemory_operand" "")))]
  ""
 {
   if (TARGET_64BIT)
-    {
-      ix86_expand_binary_operator (ROTATE, DImode, operands);
-      DONE;
-    }
-  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    ix86_expand_binary_operator (<CODE>, DImode, operands);
+  else if (const_1_to_31_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>di3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
     FAIL;
-  emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-;; Implement rotation using two double-precision shift instructions
-;; and a scratch register.
-(define_insn_and_split "ix86_rotldi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
-       (rotate:DI (match_operand:DI 1 "register_operand" "0")
-                  (match_operand:QI 2 "const_1_to_31_operand" "I")))
+
+  DONE;
+})
+
+(define_expand "<rotate_insn><mode>3"
+  [(set (match_operand:SWIM124 0 "nonimmediate_operand" "")
+	(any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand" "")
+			    (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*<rotate_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "nonimmediate_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (any_rotate:SWI48 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (can_create_pseudo_p ())
+    operands [2] = force_reg (SImode, operands[2]);
+
+  operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+}
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "<MODE>")])
+
+;; Implement rotation using two double-precision
+;; shift instructions and a scratch register.
+
+(define_insn_and_split "ix86_rotl<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		     (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
   (clobber (reg:CC FLAGS_REG))
-  (clobber (match_scratch:SI 3 "=&r"))]
- "!TARGET_64BIT"
+  (clobber (match_scratch:DWIH 3 "=&r"))]
  ""
- "&& reload_completed"
+ "#"
+ "reload_completed"
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-         (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
-                 (lshiftrt:SI (match_dup 5)
-                              (minus:QI (const_int 32) (match_dup 2)))))
+	 (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 5)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-         (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
-                 (lshiftrt:SI (match_dup 3)
-                              (minus:QI (const_int 32) (match_dup 2)))))
+	 (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 3)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
- "split_di (&operands[0], 1, &operands[4], &operands[5]);")
-
-(define_insn "*rotlsi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		   (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, DImode, operands)"
-  "rol{q}\t%0"
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn_and_split "ix86_rotr<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		       (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:DWIH 3 "=&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+		   (ashift:DWIH (match_dup 5)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+		   (ashift:DWIH (match_dup 3)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
+  else
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
   [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*rotldi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-	(rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		   (match_operand:QI 2 "nonmemory_operand" "e,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)"
-  "@
-   rol{q}\t{%2, %0|%0, %2}
-   rol{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "DI")])
-
-(define_expand "rotlsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
-
-(define_insn "*rotlsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		   (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "rol{l}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1_one_bit_zext"
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (rotate:SI (match_operand:SI 1 "register_operand" "0")
-		     (match_operand:QI 2 "const1_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "rol{l}\t%k0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "@
-   rol{l}\t{%2, %0|%0, %2}
-   rol{l}\t{%b2, %0|%0, %b2}"
+	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+    if (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{l}\t%k0";
+  else
+    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+}
   [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI
-	  (rotate:SI (match_operand:SI 1 "register_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "@
-   rol{l}\t{%2, %k0|%k0, %2}
-   rol{l}\t{%b2, %k0|%k0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_expand "rotlhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
-
-(define_insn "*rotlhi3_1_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		   (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, HImode, operands)"
-  "rol{w}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*rotlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, HImode, operands)"
-  "@
-   rol{w}\t{%2, %0|%0, %2}
-   rol{w}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "HI")])
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(any_rotate:QI (match_dup 0)
+		       (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{b}\t%0";
+  else
+    return "<rotate>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "rotate1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
 
 (define_split
  [(set (match_operand:HI 0 "register_operand" "")
-       (rotate:HI (match_dup 0) (const_int 8)))
+       (any_rotate:HI (match_dup 0) (const_int 8)))
   (clobber (reg:CC FLAGS_REG))]
- "reload_completed"
+ "reload_completed
+  && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
  [(parallel [(set (strict_low_part (match_dup 0))
 		  (bswap:HI (match_dup 0)))
-	     (clobber (reg:CC FLAGS_REG))])]
- "")
-
-(define_expand "rotlqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
-
-(define_insn "*rotlqi3_1_one_bit_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-	(rotate:QI (match_dup 0)
-		   (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
-  "rol{b}\t%0"
-  [(set_attr "type" "rotate1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		   (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, QImode, operands)"
-  "rol{b}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-	(rotate:QI (match_dup 0)
-		   (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   rol{b}\t{%1, %0|%0, %1}
-   rol{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "rotate1")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, QImode, operands)"
-  "@
-   rol{b}\t{%2, %0|%0, %2}
-   rol{b}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "QI")])
-
-(define_expand "rotrdi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-	(rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-		   (match_operand:QI 2 "nonmemory_operand" "")))]
- ""
-{
-  if (TARGET_64BIT)
-    {
-      ix86_expand_binary_operator (ROTATERT, DImode, operands);
-      DONE;
-    }
-  if (!const_1_to_31_operand (operands[2], VOIDmode))
-    FAIL;
-  emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-;; Implement rotation using two double-precision shift instructions
-;; and a scratch register.
-(define_insn_and_split "ix86_rotrdi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
-       (rotatert:DI (match_operand:DI 1 "register_operand" "0")
-                    (match_operand:QI 2 "const_1_to_31_operand" "I")))
-  (clobber (reg:CC FLAGS_REG))
-  (clobber (match_scratch:SI 3 "=&r"))]
- "!TARGET_64BIT"
- ""
- "&& reload_completed"
- [(set (match_dup 3) (match_dup 4))
-  (parallel
-   [(set (match_dup 4)
-         (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
-                 (ashift:SI (match_dup 5)
-                            (minus:QI (const_int 32) (match_dup 2)))))
-    (clobber (reg:CC FLAGS_REG))])
-  (parallel
-   [(set (match_dup 5)
-         (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
-                 (ashift:SI (match_dup 3)
-                            (minus:QI (const_int 32) (match_dup 2)))))
-    (clobber (reg:CC FLAGS_REG))])]
- "split_di (&operands[0], 1, &operands[4], &operands[5]);")
-
-(define_insn "*rotrdi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
-  "ror{q}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*rotrdi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-	(rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
-  "@
-   ror{q}\t{%2, %0|%0, %2}
-   ror{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "DI")])
-
-(define_expand "rotrsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
-
-(define_insn "*rotrsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "ror{l}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotrsi3_1_one_bit_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (rotatert:SI (match_operand:SI 1 "register_operand" "0")
-		       (match_operand:QI 2 "const1_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "ror{l}\t%k0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotrsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "@
-   ror{l}\t{%2, %0|%0, %2}
-   ror{l}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotrsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI
-	  (rotatert:SI (match_operand:SI 1 "register_operand" "0,0")
-		       (match_operand:QI 2 "nonmemory_operand" "I,c"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "@
-   ror{l}\t{%2, %k0|%k0, %2}
-   ror{l}\t{%b2, %k0|%k0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_expand "rotrhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
-
-(define_insn "*rotrhi3_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
-  "ror{w}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*rotrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, HImode, operands)"
-  "@
-   ror{w}\t{%2, %0|%0, %2}
-   ror{w}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "HI")])
-
-(define_split
- [(set (match_operand:HI 0 "register_operand" "")
-       (rotatert:HI (match_dup 0) (const_int 8)))
-  (clobber (reg:CC FLAGS_REG))]
- "reload_completed"
- [(parallel [(set (strict_low_part (match_dup 0))
-		  (bswap:HI (match_dup 0)))
-	     (clobber (reg:CC FLAGS_REG))])]
- "")
-
-(define_expand "rotrqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		     (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
-
-(define_insn "*rotrqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-		     (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
-  "ror{b}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1_one_bit_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-	(rotatert:QI (match_dup 0)
-		     (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
-  "ror{b}\t%0"
-  [(set_attr "type" "rotate1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, QImode, operands)"
-  "@
-   ror{b}\t{%2, %0|%0, %2}
-   ror{b}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-	(rotatert:QI (match_dup 0)
-		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   ror{b}\t{%1, %0|%0, %1}
-   ror{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "rotate1")
-   (set_attr "mode" "QI")])
+	     (clobber (reg:CC FLAGS_REG))])])
 
 ;; Bit set / bit test instructions
 
@@ -12532,6 +10363,8 @@
         (match_operand 3 "register_operand" ""))]
   ""
 {
+  rtx (*gen_mov_insv_1) (rtx, rtx);
+
   /* Handle insertions to %ah et al.  */
   if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
     FAIL;
@@ -12541,11 +10374,10 @@
   if (! ext_register_operand (operands[0], VOIDmode))
     FAIL;
 
-  if (TARGET_64BIT)
-    emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3]));
-  else
-    emit_insn (gen_movsi_insv_1 (operands[0], operands[3]));
-
+  gen_mov_insv_1 = (TARGET_64BIT
+		    ? gen_movdi_insv_1 : gen_movsi_insv_1);
+
+  emit_insn (gen_mov_insv_1 (operands[0], operands[3]));
   DONE;
 })
 
@@ -12697,33 +10529,19 @@
   DONE;
 })
 
-(define_insn "*btdi_rex64"
+(define_insn "*bt<mode>"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	  (zero_extract:DI
-	    (match_operand:DI 0 "register_operand" "r")
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 0 "register_operand" "r")
 	    (const_int 1)
-	    (match_operand:DI 1 "nonmemory_operand" "rN"))
+	    (match_operand:SWI48 1 "nonmemory_operand" "rN"))
 	  (const_int 0)))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
-  "bt{q}\t{%1, %0|%0, %1}"
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "bt{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "*btsi"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	  (zero_extract:SI
-	    (match_operand:SI 0 "register_operand" "r")
-	    (const_int 1)
-	    (match_operand:SI 1 "nonmemory_operand" "rN"))
-	  (const_int 0)))]
-  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
-  "bt{l}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 ;; Store-flag instructions.
 
@@ -12809,9 +10627,7 @@
 	    (const_int 0)))]
   ""
   [(set (match_dup 0) (match_dup 1))]
-{
-  PUT_MODE (operands[1], QImode);
-})
+  "PUT_MODE (operands[1], QImode);")
 
 (define_split
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
@@ -12820,9 +10636,7 @@
 	    (const_int 0)))]
   ""
   [(set (match_dup 0) (match_dup 1))]
-{
-  PUT_MODE (operands[1], QImode);
-})
+  "PUT_MODE (operands[1], QImode);")
 
 (define_split
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
@@ -12951,9 +10765,7 @@
 	(if_then_else (match_dup 0)
 		      (label_ref (match_dup 1))
 		      (pc)))]
-{
-  PUT_MODE (operands[0], VOIDmode);
-})
+  "PUT_MODE (operands[0], VOIDmode);")
 
 (define_split
   [(set (pc)
@@ -12980,82 +10792,16 @@
     FAIL;
 })
 
-;; zero_extend in SImode is correct, since this is what combine pass
-;; generates from shift insn with QImode operand.  Actually, the mode of
-;; operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; zero_extend in SImode is correct also for DImode, since this is what combine
+;; pass generates from shift insn with QImode operand.  Actually, the mode
+;; of operand 2 (bit offset operand) doesn't matter since bt insn takes
 ;; appropriate modulo of the bit offset value.
 
-(define_insn_and_split "*jcc_btdi_rex64"
+(define_insn_and_split "*jcc_bt<mode>"
   [(set (pc)
   	(if_then_else (match_operator 0 "bt_comparison_operator"
-			[(zero_extract:DI
-			   (match_operand:DI 1 "register_operand" "r")
-			   (const_int 1)
-			   (zero_extend:SI
-			     (match_operand:QI 2 "register_operand" "r")))
-			 (const_int 0)])
-		      (label_ref (match_operand 3 "" ""))
-		      (pc)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
-  "#"
-  "&& 1"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	  (zero_extract:DI
-	    (match_dup 1)
-	    (const_int 1)
-	    (match_dup 2))
-	  (const_int 0)))
-   (set (pc)
-	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
-		      (label_ref (match_dup 3))
-		      (pc)))]
-{
-  operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0);
-
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
-})
-
-;; avoid useless masking of bit offset operand
-(define_insn_and_split "*jcc_btdi_mask_rex64"
-  [(set (pc)
-  	(if_then_else (match_operator 0 "bt_comparison_operator"
-			[(zero_extract:DI
-			   (match_operand:DI 1 "register_operand" "r")
-			   (const_int 1)
-			   (and:SI
-			     (match_operand:SI 2 "register_operand" "r")
-			     (match_operand:SI 3 "const_int_operand" "n")))])
-		      (label_ref (match_operand 4 "" ""))
-		      (pc)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))
-   && (INTVAL (operands[3]) & 0x3f) == 0x3f"
-  "#"
-  "&& 1"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	  (zero_extract:DI
-	    (match_dup 1)
-	    (const_int 1)
-	    (match_dup 2))
-	  (const_int 0)))
-   (set (pc)
-	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
-		      (label_ref (match_dup 4))
-		      (pc)))]
-{
-  operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0);
-
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
-})
-
-(define_insn_and_split "*jcc_btsi"
-  [(set (pc)
-  	(if_then_else (match_operator 0 "bt_comparison_operator"
-			[(zero_extract:SI
-			   (match_operand:SI 1 "register_operand" "r")
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
 			   (const_int 1)
 			   (zero_extend:SI
 			     (match_operand:QI 2 "register_operand" "r")))
@@ -13068,7 +10814,7 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	  (zero_extract:SI
+	  (zero_extract:SWI48
 	    (match_dup 1)
 	    (const_int 1)
 	    (match_dup 2))
@@ -13078,17 +10824,18 @@
 		      (label_ref (match_dup 3))
 		      (pc)))]
 {
-  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0);
 
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
-;; avoid useless masking of bit offset operand
-(define_insn_and_split "*jcc_btsi_mask"
+;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
+;; also for DImode, this is what combine produces.
+(define_insn_and_split "*jcc_bt<mode>_mask"
   [(set (pc)
   	(if_then_else (match_operator 0 "bt_comparison_operator"
-			[(zero_extract:SI
-			   (match_operand:SI 1 "register_operand" "r")
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
 			   (const_int 1)
 			   (and:SI
 			     (match_operand:SI 2 "register_operand" "r")
@@ -13097,12 +10844,13 @@
 		      (pc)))
    (clobber (reg:CC FLAGS_REG))]
   "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
-   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
   "#"
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	  (zero_extract:SI
+	  (zero_extract:SWI48
 	    (match_dup 1)
 	    (const_int 1)
 	    (match_dup 2))
@@ -13111,7 +10859,11 @@
 	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
 		      (label_ref (match_dup 4))
 		      (pc)))]
-  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
 
 (define_insn_and_split "*jcc_btsi_1"
   [(set (pc)
@@ -13182,7 +10934,7 @@
 ;; Define combination compare-and-branch fp compare instructions to help
 ;; combine.
 
-(define_insn "*fp_jcc_3_387"
+(define_insn "*fp_jcc_1_387"
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
 			[(match_operand 1 "register_operand" "f")
@@ -13200,7 +10952,7 @@
    && !TARGET_CMOVE"
   "#")
 
-(define_insn "*fp_jcc_4_387"
+(define_insn "*fp_jcc_1r_387"
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
 			[(match_operand 1 "register_operand" "f")
@@ -13218,7 +10970,7 @@
    && !TARGET_CMOVE"
   "#")
 
-(define_insn "*fp_jcc_5_387"
+(define_insn "*fp_jcc_2_387"
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
 			[(match_operand 1 "register_operand" "f")
@@ -13233,7 +10985,7 @@
    && !TARGET_CMOVE"
   "#")
 
-(define_insn "*fp_jcc_6_387"
+(define_insn "*fp_jcc_2r_387"
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
 			[(match_operand 1 "register_operand" "f")
@@ -13248,7 +11000,7 @@
    && !TARGET_CMOVE"
   "#")
 
-(define_insn "*fp_jcc_7_387"
+(define_insn "*fp_jcc_3_387"
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
 			[(match_operand 1 "register_operand" "f")
@@ -13265,29 +11017,6 @@
    && !TARGET_CMOVE"
   "#")
 
-;; The order of operands in *fp_jcc_8_387 is forced by combine in
-;; simplify_comparison () function. Float operator is treated as RTX_OBJ
-;; with a precedence over other operators and is always put in the first
-;; place. Swap condition and operands to match ficom instruction.
-
-(define_insn "*fp_jcc_8<mode>_387"
-  [(set (pc)
-	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
-			[(match_operator 1 "float_operator"
-			   [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
-			   (match_operand 3 "register_operand" "f,f")])
-	  (label_ref (match_operand 4 "" ""))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 5 "=a,a"))]
-  "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
-   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[1]) == GET_MODE (operands[3])
-   && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
-   && !TARGET_CMOVE"
-  "#")
-
 (define_split
   [(set (pc)
 	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
@@ -13323,12 +11052,37 @@
   DONE;
 })
 
+;; The order of operands in *fp_jcc_4_387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*fp_jcc_4_<mode>_387"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
+	     (match_operand 3 "register_operand" "f,f")])
+	  (label_ref (match_operand 4 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a,a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
+   && !TARGET_CMOVE"
+  "#")
+
 (define_split
   [(set (pc)
-	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
-			[(match_operator 1 "float_operator"
-			   [(match_operand:X87MODEI12 2 "memory_operand" "")])
-			   (match_operand 3 "register_operand" "")])
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "memory_operand" "")])
+	     (match_operand 3 "register_operand" "")])
 	  (match_operand 4 "" "")
 	  (match_operand 5 "" "")))
    (clobber (reg:CCFP FPSR_REG))
@@ -13338,6 +11092,7 @@
   [(const_int 0)]
 {
   operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+
   ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
 			operands[3], operands[7],
 			operands[4], operands[5], operands[6], NULL_RTX);
@@ -13347,10 +11102,11 @@
 ;; %%% Kill this when reload knows how to do it.
 (define_split
   [(set (pc)
-	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
-			[(match_operator 1 "float_operator"
-			   [(match_operand:X87MODEI12 2 "register_operand" "")])
-			   (match_operand 3 "register_operand" "")])
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "register_operand" "")])
+	     (match_operand 3 "register_operand" "")])
 	  (match_operand 4 "" "")
 	  (match_operand 5 "" "")))
    (clobber (reg:CCFP FPSR_REG))
@@ -13361,6 +11117,7 @@
 {
   operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
   operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+
   ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
 			operands[3], operands[7],
 			operands[4], operands[5], operands[6], operands[2]);
@@ -13510,6 +11267,22 @@
   DONE;
 })
 
+(define_insn_and_split "*call_pop_0_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_0"
   [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
 	 (match_operand:SI 1 "" ""))
@@ -13525,6 +11298,22 @@
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_pop_1_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "i")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
 	 (match_operand:SI 1 "" ""))
@@ -13539,6 +11328,22 @@
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_pop_1_vzeroupper"
+ [(parallel
+   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "i,i")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_pop_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand:SI 1 "" ""))
@@ -13571,48 +11376,74 @@
   DONE;
 })
 
-(define_expand "sibcall_pop"
-  [(parallel [(call (match_operand:QI 0 "" "")
-		    (match_operand:SI 1 "" ""))
-	      (set (reg:SI SP_REG)
-		   (plus:SI (reg:SI SP_REG)
-			    (match_operand:SI 3 "" "")))])]
-  ""
-{
-  ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 1);
-  DONE;
-})
+(define_insn_and_split "*call_0_vzeroupper"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
 
 (define_insn "*call_0"
   [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
 	 (match_operand 1 "" ""))]
   ""
-{
-  if (SIBLING_CALL_P (insn))
-    return "jmp\t%P0";
-  else
-    return "call\t%P0";
-}
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*call_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
 	 (match_operand 1 "" ""))]
   "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (constant_call_address_operand (operands[0], Pmode))
-    return "call\t%P0";
-  return "call\t%A0";
-}
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*sibcall_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*sibcall_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
   "!TARGET_64BIT && SIBLING_CALL_P (insn)"
-  "@
-   jmp\t%P0
-   jmp\t%A0"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*call_1_rex64"
@@ -13620,11 +11451,33 @@
 	 (match_operand 1 "" ""))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)
    && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
-{
-  if (constant_call_address_operand (operands[0], Pmode))
-    return "call\t%P0";
-  return "call\t%A0";
-}
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	   (match_operand 1 "" ""))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*call_1_rex64_ms_sysv"
@@ -13644,27 +11497,45 @@
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (constant_call_address_operand (operands[0], Pmode))
-    return "call\t%P0";
-  return "call\t%A0";
-}
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_large_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*call_1_rex64_large"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
 	 (match_operand 1 "" ""))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)"
-  "call\t%A0"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
   [(set_attr "type" "call")])
 
 (define_insn "*sibcall_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
   "TARGET_64BIT && SIBLING_CALL_P (insn)"
-  "@
-   jmp\t%P0
-   jmp\t%A0"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
 ;; Call subroutine, returning value in operand 0
@@ -13870,13 +11741,22 @@
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
-(define_insn "vswapmov"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (match_operand:SI 1 "register_operand" "r"))
-   (unspec_volatile [(const_int 0)] UNSPECV_VSWAPMOV)]
-  ""
-  "movl.s\t{%1, %0|%0, %1}"
-  [(set_attr "length" "2")
+;; Generate nops.  Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
+		    UNSPECV_NOPS)]
+  "reload_completed"
+{
+  int num = INTVAL (operands[0]);
+
+  gcc_assert (num >= 1 && num <= 8);
+
+  while (num--)
+    fputs ("\tnop\n", asm_out_file);
+
+  return "";
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
@@ -13910,7 +11790,7 @@
 	(unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  { return output_set_got (operands[0], NULL_RTX); }
+  "* return output_set_got (operands[0], NULL_RTX);"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
@@ -13920,7 +11800,7 @@
 	 UNSPEC_SET_GOT))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  { return output_set_got (operands[0], operands[1]); }
+  "* return output_set_got (operands[0], operands[1]);"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
@@ -14007,38 +11887,91 @@
   "leave"
   [(set_attr "type" "leave")])
 
-(define_expand "ffssi2"
-  [(parallel
-     [(set (match_operand:SI 0 "register_operand" "")
-	   (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
-      (clobber (match_scratch:SI 2 ""))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  if (TARGET_CMOVE)
-    {
-      emit_insn (gen_ffs_cmove (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_expand "ffs_cmove"
+;; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+  [(const_int 0)]
+  ""
+{
+  ix86_expand_split_stack_prologue ();
+  DONE;
+})
+
+;; In order to support the call/return predictor, we use a return
+;; instruction which the middle-end doesn't see.
+(define_insn "split_stack_return"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+		     UNSPECV_SPLIT_STACK_RETURN)]
+  ""
+{
+  if (operands[0] == const0_rtx)
+    return "ret";
+  else
+    return "ret\t%0";
+}
+  [(set_attr "atom_unit" "jeu")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	(if_then_else (match_operand:SI 0 "const0_operand" "")
+		      (const_int 1)
+		      (const_int 3)))
+   (set (attr "length_immediate")
+	(if_then_else (match_operand:SI 0 "const0_operand" "")
+		      (const_int 0)
+		      (const_int 2)))])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+  [(set (pc) (if_then_else
+	      (ltu (minus (reg SP_REG)
+			  (match_operand 0 "register_operand" ""))
+		   (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))]
+  ""
+{
+  rtx reg, size, limit;
+
+  reg = gen_reg_rtx (Pmode);
+  size = force_reg (Pmode, operands[0]);
+  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size));
+  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			  UNSPEC_STACK_CHECK);
+  limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit));
+  ix86_expand_branch (GEU, reg, limit, operands[1]);
+
+  DONE;
+})
+
+;; Bit manipulation instructions.
+
+(define_expand "ffs<mode>2"
   [(set (match_dup 2) (const_int -1))
    (parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "")
-				(const_int 0)))
-	      (set (match_operand:SI 0 "register_operand" "")
-		   (ctz:SI (match_dup 1)))])
-   (set (match_dup 0) (if_then_else:SI
+		   (compare:CCZ
+		     (match_operand:SWI48 1 "nonimmediate_operand" "")
+		     (const_int 0)))
+	      (set (match_operand:SWI48 0 "register_operand" "")
+		   (ctz:SWI48 (match_dup 1)))])
+   (set (match_dup 0) (if_then_else:SWI48
 			(eq (reg:CCZ FLAGS_REG) (const_int 0))
 			(match_dup 2)
 			(match_dup 0)))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_CMOVE"
-  "operands[2] = gen_reg_rtx (SImode);")
-
-(define_insn_and_split "*ffs_no_cmove"
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (<MODE>mode == SImode && !TARGET_CMOVE)
+    {
+      emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_insn_and_split "ffssi2_no_cmove"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
    (clobber (match_scratch:SI 2 "=&q"))
@@ -14062,93 +11995,274 @@
   ix86_expand_clear (operands[2]);
 })
 
-(define_insn "*ffssi_1"
+(define_insn "*ffs<mode>_1"
   [(set (reg:CCZ FLAGS_REG)
-	(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
+	(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
 		     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(ctz:SI (match_dup 1)))]
-  ""
-  "bsf{l}\t{%1, %0|%0, %1}"
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(ctz:SWI48 (match_dup 1)))]
+  ""
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
-
-(define_expand "ffsdi2"
-  [(set (match_dup 2) (const_int -1))
-   (parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "")
-				(const_int 0)))
-	      (set (match_operand:DI 0 "register_operand" "")
-		   (ctz:DI (match_dup 1)))])
-   (set (match_dup 0) (if_then_else:DI
-			(eq (reg:CCZ FLAGS_REG) (const_int 0))
-			(match_dup 2)
-			(match_dup 0)))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "operands[2] = gen_reg_rtx (DImode);")
-
-(define_insn "*ffsdi_1"
-  [(set (reg:CCZ FLAGS_REG)
-	(compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
-		     (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(ctz:DI (match_dup 1)))]
-  "TARGET_64BIT"
-  "bsf{q}\t{%1, %0|%0, %1}"
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (TARGET_BMI)
+    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else
+    return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "ctzsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "bsf{l}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
-
-(define_insn "ctzdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "bsf{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_expand "clzsi2"
+   (set (attr "prefix_rep") (symbol_ref "TARGET_BMI"))
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "clz<mode>2"
   [(parallel
-     [(set (match_operand:SI 0 "register_operand" "")
-	   (minus:SI (const_int 31)
-		     (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
+     [(set (match_operand:SWI248 0 "register_operand" "")
+	   (minus:SWI248
+	     (match_dup 2)
+	     (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" ""))))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
-     [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
+     [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
 {
   if (TARGET_ABM)
     {
-      emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+      emit_insn (gen_clz<mode>2_abm (operands[0], operands[1]));
       DONE;
     }
-})
-
-(define_insn "clzsi2_abm"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ABM"
-  "lzcnt{l}\t{%1, %0|%0, %1}"
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+})
+
+(define_insn "clz<mode>2_abm"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM || TARGET_BMI"
+  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
+
+;; BMI instructions.
+(define_insn "*bmi_andn_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (not:SWI48
+            (match_operand:SWI48 1 "register_operand" "r"))
+            (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "andn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi_bextr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_BEXTR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "bextr\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsi_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (neg:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+;; TBM instructions.
+(define_insn "tbm_bextri_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (zero_extract:SWI48
+          (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+          (match_operand:SWI48 2 "const_0_to_255_operand" "n")
+          (match_operand:SWI48 3 "const_0_to_255_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
+  return "bextr\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blci_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (not:SWI48
+            (plus:SWI48
+              (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+              (const_int 1)))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blci\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcs_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcs\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_t1mskc_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "t1mskc\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_tzmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "tzmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bsr_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (const_int 63)
+		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
 
 (define_insn "bsr"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -14161,6 +12275,17 @@
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")])
 
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (const_int 15)
+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "popcount<mode>2"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
 	(popcount:SWI248
@@ -14210,19 +12335,19 @@
 #if TARGET_MACHO
   return "popcnt\t{%1, %0|%0, %1}";
 #else
-  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  return "popcnt{l}\t{%1, %0|%0, %1}";
 #endif
 }
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
    (set_attr "mode" "SI")])
 
-(define_expand "bswapsi2"
-  [(set (match_operand:SI 0 "register_operand" "")
-	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
-  ""
-{
-  if (!(TARGET_BSWAP || TARGET_MOVBE))
+(define_expand "bswap<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "")))]
+  ""
+{
+  if (<MODE>mode == SImode && !(TARGET_BSWAP || TARGET_MOVBE))
     {
       rtx x = operands[0];
 
@@ -14234,28 +12359,29 @@
     }
 })
 
-(define_insn "*bswapsi_movbe"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m")
-	(bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))]
-  "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "*bswap<mode>2_movbe"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
+	(bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     bswap\t%0
     movbe\t{%1, %0|%0, %1}
     movbe\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,imov,imov")
-   (set_attr "modrm" "*,1,1")
-   (set_attr "prefix_0f" "1")
+  [(set_attr "type" "bitmanip,imov,imov")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "prefix_0f" "*,1,1")
    (set_attr "prefix_extra" "*,1,1")
-   (set_attr "length" "2,*,*")
-   (set_attr "mode" "SI")])
-
-(define_insn "*bswapsi_1"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bswap<mode>2_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
   "TARGET_BSWAP"
   "bswap\t%0"
-  [(set_attr "prefix_0f" "1")
-   (set_attr "length" "2")])
+  [(set_attr "type" "bitmanip")
+   (set_attr "modrm" "0")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*bswaphi_lowpart_1"
   [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
@@ -14277,114 +12403,6 @@
   [(set_attr "length" "4")
    (set_attr "mode" "HI")])
 
-(define_expand "bswapdi2"
-  [(set (match_operand:DI 0 "register_operand" "")
-	(bswap:DI (match_operand:DI 1 "register_operand" "")))]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*bswapdi_movbe"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
-	(bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))]
-  "TARGET_64BIT && TARGET_MOVBE
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-    bswap\t%0
-    movbe\t{%1, %0|%0, %1}
-    movbe\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,imov,imov")
-   (set_attr "modrm" "*,1,1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "prefix_extra" "*,1,1")
-   (set_attr "length" "3,*,*")
-   (set_attr "mode" "DI")])
-
-(define_insn "*bswapdi_1"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(bswap:DI (match_operand:DI 1 "register_operand" "0")))]
-  "TARGET_64BIT"
-  "bswap\t%0"
-  [(set_attr "prefix_0f" "1")
-   (set_attr "length" "3")])
-
-(define_expand "clzdi2"
-  [(parallel
-     [(set (match_operand:DI 0 "register_operand" "")
-	   (minus:DI (const_int 63)
-		     (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
-     [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
-      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_ABM)
-    {
-      emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_insn "clzdi2_abm"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_ABM"
-  "lzcnt{q}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "DI")])
-
-(define_insn "bsr_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(minus:DI (const_int 63)
-		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "bsr{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_expand "clzhi2"
-  [(parallel
-     [(set (match_operand:HI 0 "register_operand" "")
-	   (minus:HI (const_int 15)
-		     (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
-     [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  if (TARGET_ABM)
-    {
-      emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_insn "clzhi2_abm"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ABM"
-  "lzcnt{w}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "HI")])
-
-(define_insn "*bsrhi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(minus:HI (const_int 15)
-		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "bsr{w}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "HI")])
-
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand" "")
 	(parity:DI (match_operand:DI 1 "register_operand" "")))]
@@ -14413,36 +12431,6 @@
   DONE;
 })
 
-(define_insn_and_split "paritydi2_cmp"
-  [(set (reg:CC FLAGS_REG)
-	(parity:CC (match_operand:DI 3 "register_operand" "0")))
-   (clobber (match_scratch:DI 0 "=r"))
-   (clobber (match_scratch:SI 1 "=&r"))
-   (clobber (match_scratch:HI 2 "=Q"))]
-  "! TARGET_POPCNT"
-  "#"
-  "&& reload_completed"
-  [(parallel
-     [(set (match_dup 1)
-	   (xor:SI (match_dup 1) (match_dup 4)))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
-     [(set (reg:CC FLAGS_REG)
-	   (parity:CC (match_dup 1)))
-      (clobber (match_dup 1))
-      (clobber (match_dup 2))])]
-{
-  operands[4] = gen_lowpart (SImode, operands[3]);
-
-  if (TARGET_64BIT)
-    {
-      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
-      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
-    }
-  else
-    operands[1] = gen_highpart (SImode, operands[3]);
-})
-
 (define_expand "paritysi2"
   [(set (match_operand:SI 0 "register_operand" "")
 	(parity:SI (match_operand:SI 1 "register_operand" "")))]
@@ -14462,9 +12450,41 @@
   DONE;
 })
 
+(define_insn_and_split "paritydi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:DI 3 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:DI 0 "=r"))
+   (clobber (match_scratch:SI 1 "=&r"))
+   (clobber (match_scratch:HI 2 "=Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:SI (match_dup 1) (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+      (clobber (match_dup 1))
+      (clobber (match_dup 2))])]
+{
+  operands[4] = gen_lowpart (SImode, operands[3]);
+
+  if (TARGET_64BIT)
+    {
+      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
+      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
+    }
+  else
+    operands[1] = gen_highpart (SImode, operands[3]);
+})
+
 (define_insn_and_split "paritysi2_cmp"
   [(set (reg:CC FLAGS_REG)
-	(parity:CC (match_operand:SI 2 "register_operand" "0")))
+	(unspec:CC [(match_operand:SI 2 "register_operand" "0")]
+		   UNSPEC_PARITY))
    (clobber (match_scratch:SI 0 "=r"))
    (clobber (match_scratch:HI 1 "=&Q"))]
   "! TARGET_POPCNT"
@@ -14476,7 +12496,7 @@
       (clobber (reg:CC FLAGS_REG))])
    (parallel
      [(set (reg:CC FLAGS_REG)
-	   (parity:CC (match_dup 1)))
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
       (clobber (match_dup 1))])]
 {
   operands[3] = gen_lowpart (HImode, operands[2]);
@@ -14487,20 +12507,13 @@
 
 (define_insn "*parityhi2_cmp"
   [(set (reg:CC FLAGS_REG)
-	(parity:CC (match_operand:HI 1 "register_operand" "0")))
+	(unspec:CC [(match_operand:HI 1 "register_operand" "0")]
+		   UNSPEC_PARITY))
    (clobber (match_scratch:HI 0 "=Q"))]
   "! TARGET_POPCNT"
   "xor{b}\t{%h0, %b0|%b0, %h0}"
   [(set_attr "length" "2")
    (set_attr "mode" "HI")])
-
-(define_insn "*parityqi2_cmp"
-  [(set (reg:CC FLAGS_REG)
-	(parity:CC (match_operand:QI 0 "register_operand" "q")))]
-  "! TARGET_POPCNT"
-  "test{b}\t%0, %0"
-  [(set_attr "length" "2")
-   (set_attr "mode" "QI")])
 
 ;; Thread-local storage patterns for ELF.
 ;;
@@ -14517,25 +12530,10 @@
    (clobber (match_scratch:SI 5 "=c"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_GNU_TLS"
-  "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3"
+  "lea{l}\t{%a2@tlsgd(,%1,1), %0|%0, %a2@tlsgd[%1*1]}\;call\t%P3"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
-(define_insn "*tls_global_dynamic_32_sun"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
-		    (match_operand:SI 2 "tls_symbolic_operand" "")
-		    (match_operand:SI 3 "call_insn_operand" "")]
-		    UNSPEC_TLS_GD))
-   (clobber (match_scratch:SI 4 "=d"))
-   (clobber (match_scratch:SI 5 "=c"))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_SUN_TLS"
-  "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]}
-	push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop"
-  [(set_attr "type" "multi")
-   (set_attr "length" "14")])
-
 (define_expand "tls_global_dynamic_32"
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
 		   (unspec:SI
@@ -14571,7 +12569,7 @@
    (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
 	      UNSPEC_TLS_GD)]
   "TARGET_64BIT"
-  { return ASM_BYTE "0x66\n\tlea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; }
+  { return ASM_BYTE "0x66\n\tlea{q}\t{%a1@tlsgd(%%rip), %%rdi|rdi, %a1@tlsgd[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; }
   [(set_attr "type" "multi")
    (set_attr "length" "16")])
 
@@ -14600,24 +12598,10 @@
    (clobber (match_scratch:SI 4 "=c"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_GNU_TLS"
-  "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2"
+  "lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}\;call\t%P2"
   [(set_attr "type" "multi")
    (set_attr "length" "11")])
 
-(define_insn "*tls_local_dynamic_base_32_sun"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
-                    (match_operand:SI 2 "call_insn_operand" "")]
-		   UNSPEC_TLS_LD_BASE))
-   (clobber (match_scratch:SI 3 "=d"))
-   (clobber (match_scratch:SI 4 "=c"))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_SUN_TLS"
-  "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]}
-	push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3"
-  [(set_attr "type" "multi")
-   (set_attr "length" "13")])
-
 (define_expand "tls_local_dynamic_base_32"
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
 		   (unspec:SI [(match_dup 1) (match_dup 2)]
@@ -14649,7 +12633,7 @@
 		 (match_operand:DI 2 "" "")))
    (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
   "TARGET_64BIT"
-  "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1"
+  "lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}\;call\t%P1"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
@@ -14690,58 +12674,47 @@
 			      UNSPEC_TLS_GD))
 	      (clobber (match_dup 4))
 	      (clobber (match_dup 5))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Segment register for the thread base ptr load
+(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
 
 ;; Load and add the thread base pointer from %gs:0.
-
-(define_insn "*load_tp_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(const_int 0)] UNSPEC_TP))]
-  "!TARGET_64BIT"
-  "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+(define_insn "*load_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(const_int 0)] UNSPEC_TP))]
+  ""
+  "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])
 
-(define_insn "*add_tp_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
-		 (match_operand:SI 1 "register_operand" "0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
-  "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+(define_insn "*add_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (unspec:P [(const_int 0)] UNSPEC_TP)
+		(match_operand:P 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
   [(set_attr "type" "alu")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])
 
-(define_insn "*load_tp_di"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(unspec:DI [(const_int 0)] UNSPEC_TP))]
-  "TARGET_64BIT"
-  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
-
-(define_insn "*add_tp_di"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
-		 (match_operand:DI 1 "register_operand" "0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
-  [(set_attr "type" "alu")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+;; The Sun linker took the AMD64 TLS spec literally and can only handle
+;; %rax as destination of the initial executable code sequence.
+(define_insn "tls_initial_exec_64_sun"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI
+	 [(match_operand:DI 1 "tls_symbolic_operand" "")]
+	 UNSPEC_TLS_IE_SUN))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_SUN_TLS"
+  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}\n\tadd{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"
+  [(set_attr "type" "multi")])
 
 ;; GNU2 TLS patterns can be split.
 
@@ -14870,8 +12843,6 @@
   operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
   emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
 })
-
-;;
 
 ;; These patterns match the binary 387 instructions for addM3, subM3,
 ;; mulM3 and divM3.  There are three patterns for each of DFmode and
@@ -15368,7 +13339,8 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
 
 (define_insn "sqrt_extend<mode>xf2_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -15380,7 +13352,8 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")
-   (set_attr "amdfam10_decode" "direct")])
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
 
 (define_insn "*rsqrtsf2_sse"
   [(set (match_operand:SF 0 "register_operand" "=x")
@@ -15414,7 +13387,8 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "*")
-   (set_attr "amdfam10_decode" "*")])
+   (set_attr "amdfam10_decode" "*")
+   (set_attr "bdver1_decode" "*")])
 
 (define_expand "sqrt<mode>2"
   [(set (match_operand:MODEF 0 "register_operand" "")
@@ -15488,6 +13462,8 @@
    (use (match_operand:MODEF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
+  rtx (*gen_truncxf) (rtx, rtx);
+
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
@@ -15504,10 +13480,11 @@
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !TARGET_MIX_SSE_I387)
-    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
-  else
-    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
-
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
   DONE;
 })
 
@@ -15556,6 +13533,8 @@
    (use (match_operand:MODEF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
+  rtx (*gen_truncxf) (rtx, rtx);
+
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
@@ -15573,10 +13552,11 @@
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !TARGET_MIX_SSE_I387)
-    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
-  else
-    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
-
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
   DONE;
 })
 
@@ -15650,8 +13630,7 @@
 	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
    && !(reload_completed || reload_in_progress)"
-  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]
-  "")
+  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))])
 
 (define_split
   [(set (match_operand:XF 0 "register_operand" "")
@@ -15661,8 +13640,7 @@
 	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
    && !(reload_completed || reload_in_progress)"
-  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
-  "")
+  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))])
 
 (define_insn "sincos_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -15688,8 +13666,8 @@
 	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
    && !(reload_completed || reload_in_progress)"
-  [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]
-  "")
+  [(set (match_dup 1)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))])
 
 (define_split
   [(set (match_operand:XF 0 "register_operand" "")
@@ -15700,8 +13678,8 @@
 	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
    && !(reload_completed || reload_in_progress)"
-  [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]
-  "")
+  [(set (match_dup 0)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))])
 
 (define_expand "sincos<mode>3"
   [(use (match_operand:MODEF 0 "register_operand" ""))
@@ -15817,8 +13795,7 @@
 			      UNSPEC_FPATAN))
 	      (clobber (match_scratch:XF 3 ""))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "")
+   && flag_unsafe_math_optimizations")
 
 (define_expand "atan2<mode>3"
   [(use (match_operand:MODEF 0 "register_operand" ""))
@@ -16169,9 +14146,7 @@
 		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  operands[2] = gen_reg_rtx (XFmode);
-})
+  "operands[2] = gen_reg_rtx (XFmode);")
 
 (define_expand "logb<mode>2"
   [(use (match_operand:MODEF 0 "register_operand" ""))
@@ -16555,9 +14530,7 @@
 		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  operands[2] = gen_reg_rtx (XFmode);
-})
+  "operands[2] = gen_reg_rtx (XFmode);")
 
 (define_expand "significand<mode>2"
   [(use (match_operand:MODEF 0 "register_operand" ""))
@@ -16700,8 +14673,7 @@
   "reload_completed"
   [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
 	      (clobber (match_dup 3))])
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
@@ -16711,8 +14683,7 @@
    (clobber (match_scratch 3 ""))]
   "reload_completed"
   [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
-	      (clobber (match_dup 3))])]
-  "")
+	      (clobber (match_dup 3))])])
 
 (define_insn_and_split "*fist<mode>2_1"
   [(set (match_operand:X87MODEI12 0 "register_operand" "")
@@ -16758,8 +14729,7 @@
    (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
   "reload_completed"
   [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 (define_split
   [(set (match_operand:X87MODEI12 0 "memory_operand" "")
@@ -16767,23 +14737,20 @@
 			   UNSPEC_FIST))
    (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
   "reload_completed"
-  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]
-  "")
+  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))])
 
 (define_expand "lrintxf<mode>2"
   [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
      (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		      UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387"
-  "")
+  "TARGET_USE_FANCY_MATH_387")
 
 (define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2"
   [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
      (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")]
 			UNSPEC_FIX_NOTRUNC))]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
-   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)"
-  "")
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)")
 
 (define_expand "lround<MODEF:mode><SSEMODEI24:mode>2"
   [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
@@ -16964,8 +14931,7 @@
 	      (use (match_dup 2))
 	      (use (match_dup 3))
 	      (clobber (match_dup 5))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
@@ -16979,8 +14945,7 @@
   [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
 	      (use (match_dup 2))
 	      (use (match_dup 3))
-	      (clobber (match_dup 5))])]
-  "")
+	      (clobber (match_dup 5))])])
 
 (define_insn "fist<mode>2_floor"
   [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
@@ -17021,8 +14986,7 @@
 				  UNSPEC_FIST_FLOOR))
 	      (use (match_dup 2))
 	      (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:X87MODEI12 0 "memory_operand" "")
@@ -17035,8 +14999,7 @@
   [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
 				  UNSPEC_FIST_FLOOR))
 	      (use (match_dup 2))
-	      (use (match_dup 3))])]
-  "")
+	      (use (match_dup 3))])])
 
 (define_expand "lfloorxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
@@ -17045,8 +15008,7 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_USE_FANCY_MATH_387
    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "")
+   && flag_unsafe_math_optimizations")
 
 (define_expand "lfloor<MODEF:mode><SWI48:mode>2"
   [(match_operand:SWI48 0 "nonimmediate_operand" "")
@@ -17226,8 +15188,7 @@
 	      (use (match_dup 2))
 	      (use (match_dup 3))
 	      (clobber (match_dup 5))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
@@ -17241,8 +15202,7 @@
   [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
 	      (use (match_dup 2))
 	      (use (match_dup 3))
-	      (clobber (match_dup 5))])]
-  "")
+	      (clobber (match_dup 5))])])
 
 (define_insn "fist<mode>2_ceil"
   [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
@@ -17283,8 +15243,7 @@
 				  UNSPEC_FIST_CEIL))
 	      (use (match_dup 2))
 	      (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
+   (set (match_dup 0) (match_dup 4))])
 
 (define_split
   [(set (match_operand:X87MODEI12 0 "memory_operand" "")
@@ -17297,8 +15256,7 @@
   [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
 				  UNSPEC_FIST_CEIL))
 	      (use (match_dup 2))
-	      (use (match_dup 3))])]
-  "")
+	      (use (match_dup 3))])])
 
 (define_expand "lceilxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
@@ -17307,8 +15265,7 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_USE_FANCY_MATH_387
    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-  "")
+   && flag_unsafe_math_optimizations")
 
 (define_expand "lceil<MODEF:mode><SWI48:mode>2"
   [(match_operand:SWI48 0 "nonimmediate_operand" "")
@@ -17458,7 +15415,6 @@
    && flag_unsafe_math_optimizations"
 {
   emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
-
   DONE;
 })
 
@@ -17578,18 +15534,65 @@
   DONE;
 })
 
-(define_expand "signbit<mode>2"
+(define_expand "signbitxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+  DONE;
+})
+
+(define_insn "movmsk_df"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:DF 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
+  "%vmovmskpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+;; Use movmskpd in SSE mode to avoid store forwarding stall
+;; for 32bit targets and movq+shrq sequence for 64bit targets.
+(define_expand "signbitdf2"
   [(use (match_operand:SI 0 "register_operand" ""))
-   (use (match_operand:X87MODEF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  rtx mask = GEN_INT (0x0200);
-
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
+    {
+      emit_insn (gen_movmsk_df (operands[0], operands[1]));
+      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+    }
+  else
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+
+      emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
+      emit_insn (gen_andsi3 (operands[0],
+		 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+    }
+  DONE;
+})
+
+(define_expand "signbitsf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
+{
   rtx scratch = gen_reg_rtx (HImode);
 
-  emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1]));
-  emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask));
+  emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
   DONE;
 })
 
@@ -17603,11 +15606,11 @@
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
-(define_expand "movmemsi"
+(define_expand "movmem<mode>"
   [(use (match_operand:BLK 0 "memory_operand" ""))
    (use (match_operand:BLK 1 "memory_operand" ""))
-   (use (match_operand:SI 2 "nonmemory_operand" ""))
-   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (match_operand:SWI48 2 "nonmemory_operand" ""))
+   (use (match_operand:SWI48 3 "const_int_operand" ""))
    (use (match_operand:SI 4 "const_int_operand" ""))
    (use (match_operand:SI 5 "const_int_operand" ""))]
   ""
@@ -17619,22 +15622,6 @@
    FAIL;
 })
 
-(define_expand "movmemdi"
-  [(use (match_operand:BLK 0 "memory_operand" ""))
-   (use (match_operand:BLK 1 "memory_operand" ""))
-   (use (match_operand:DI 2 "nonmemory_operand" ""))
-   (use (match_operand:DI 3 "const_int_operand" ""))
-   (use (match_operand:SI 4 "const_int_operand" ""))
-   (use (match_operand:SI 5 "const_int_operand" ""))]
-  "TARGET_64BIT"
-{
- if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
-			 operands[4], operands[5]))
-   DONE;
- else
-   FAIL;
-})
-
 ;; Most CPUs don't like single string operations
 ;; Handle this case here to simplify previous expander.
 
@@ -17689,98 +15676,57 @@
   "TARGET_64BIT"
   "movsq"
   [(set_attr "type" "str")
-   (set_attr "mode" "DI")
-   (set_attr "memory" "both")])
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
 
 (define_insn "*strmovsi_1"
-  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
-	(mem:SI (match_operand:SI 3 "register_operand" "1")))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 2)
-		 (const_int 4)))
-   (set (match_operand:SI 1 "register_operand" "=S")
-	(plus:SI (match_dup 3)
-		 (const_int 4)))]
-  "!TARGET_64BIT"
+  [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
+	(mem:SI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 4)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 4)))]
+  ""
   "movs{l|d}"
   [(set_attr "type" "str")
-   (set_attr "mode" "SI")
-   (set_attr "memory" "both")])
-
-(define_insn "*strmovsi_rex_1"
-  [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
-	(mem:SI (match_operand:DI 3 "register_operand" "1")))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 2)
-		 (const_int 4)))
-   (set (match_operand:DI 1 "register_operand" "=S")
-	(plus:DI (match_dup 3)
-		 (const_int 4)))]
-  "TARGET_64BIT"
-  "movs{l|d}"
-  [(set_attr "type" "str")
-   (set_attr "mode" "SI")
-   (set_attr "memory" "both")])
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
 
 (define_insn "*strmovhi_1"
-  [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
-	(mem:HI (match_operand:SI 3 "register_operand" "1")))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 2)
-		 (const_int 2)))
-   (set (match_operand:SI 1 "register_operand" "=S")
-	(plus:SI (match_dup 3)
-		 (const_int 2)))]
-  "!TARGET_64BIT"
-  "movsw"
-  [(set_attr "type" "str")
-   (set_attr "memory" "both")
-   (set_attr "mode" "HI")])
-
-(define_insn "*strmovhi_rex_1"
-  [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
-	(mem:HI (match_operand:DI 3 "register_operand" "1")))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 2)
-		 (const_int 2)))
-   (set (match_operand:DI 1 "register_operand" "=S")
-	(plus:DI (match_dup 3)
-		 (const_int 2)))]
-  "TARGET_64BIT"
+  [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
+	(mem:HI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 2)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 2)))]
+  ""
   "movsw"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "HI")])
 
 (define_insn "*strmovqi_1"
-  [(set (mem:QI (match_operand:SI 2 "register_operand" "0"))
-	(mem:QI (match_operand:SI 3 "register_operand" "1")))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 2)
-		 (const_int 1)))
-   (set (match_operand:SI 1 "register_operand" "=S")
-	(plus:SI (match_dup 3)
-		 (const_int 1)))]
-  "!TARGET_64BIT"
+  [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
+	(mem:QI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 1)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 1)))]
+  ""
   "movsb"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
-   (set_attr "mode" "QI")])
-
-(define_insn "*strmovqi_rex_1"
-  [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
-	(mem:QI (match_operand:DI 3 "register_operand" "1")))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 2)
-		 (const_int 1)))
-   (set (match_operand:DI 1 "register_operand" "=S")
-	(plus:DI (match_dup 3)
-		 (const_int 1)))]
-  "TARGET_64BIT"
-  "movsb"
-  [(set_attr "type" "str")
-   (set_attr "memory" "both")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "mode" "QI")])
 
 (define_expand "rep_mov"
@@ -17808,87 +15754,51 @@
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "rep movsq"
+  "rep{%;} movsq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
    (set_attr "mode" "DI")])
 
 (define_insn "*rep_movsi"
-  [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
-   (set (match_operand:SI 0 "register_operand" "=D")
-        (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2")
-			    (const_int 2))
-		 (match_operand:SI 3 "register_operand" "0")))
-   (set (match_operand:SI 1 "register_operand" "=S")
-        (plus:SI (ashift:SI (match_dup 5) (const_int 2))
-		 (match_operand:SI 4 "register_operand" "1")))
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (ashift:P (match_dup 5) (const_int 2))
+		(match_operand:P 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
-  "!TARGET_64BIT"
-  "rep movs{l|d}"
-  [(set_attr "type" "str")
-   (set_attr "prefix_rep" "1")
-   (set_attr "memory" "both")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rep_movsi_rex64"
-  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
-			    (const_int 2))
-		 (match_operand:DI 3 "register_operand" "0")))
-   (set (match_operand:DI 1 "register_operand" "=S")
-        (plus:DI (ashift:DI (match_dup 5) (const_int 2))
-		 (match_operand:DI 4 "register_operand" "1")))
-   (set (mem:BLK (match_dup 3))
-	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))]
-  "TARGET_64BIT"
-  "rep movs{l|d}"
+  ""
+  "rep{%;} movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
 
 (define_insn "*rep_movqi"
-  [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
-   (set (match_operand:SI 0 "register_operand" "=D")
-        (plus:SI (match_operand:SI 3 "register_operand" "0")
-		 (match_operand:SI 5 "register_operand" "2")))
-   (set (match_operand:SI 1 "register_operand" "=S")
-        (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5)))
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 5 "register_operand" "2")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
-  "!TARGET_64BIT"
-  "rep movsb"
+  ""
+  "rep{%;} movsb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rep_movqi_rex64"
-  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (match_operand:DI 3 "register_operand" "0")
-		 (match_operand:DI 5 "register_operand" "2")))
-   (set (match_operand:DI 1 "register_operand" "=S")
-        (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
-   (set (mem:BLK (match_dup 3))
-	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))]
-  "TARGET_64BIT"
-  "rep movsb"
-  [(set_attr "type" "str")
-   (set_attr "prefix_rep" "1")
-   (set_attr "memory" "both")
-   (set_attr "mode" "SI")])
-
-(define_expand "setmemsi"
+   (set_attr "mode" "QI")])
+
+(define_expand "setmem<mode>"
    [(use (match_operand:BLK 0 "memory_operand" ""))
-    (use (match_operand:SI 1 "nonmemory_operand" ""))
+    (use (match_operand:SWI48 1 "nonmemory_operand" ""))
     (use (match_operand 2 "const_int_operand" ""))
     (use (match_operand 3 "const_int_operand" ""))
     (use (match_operand:SI 4 "const_int_operand" ""))
@@ -17903,23 +15813,6 @@
    FAIL;
 })
 
-(define_expand "setmemdi"
-   [(use (match_operand:BLK 0 "memory_operand" ""))
-    (use (match_operand:DI 1 "nonmemory_operand" ""))
-    (use (match_operand 2 "const_int_operand" ""))
-    (use (match_operand 3 "const_int_operand" ""))
-    (use (match_operand 4 "const_int_operand" ""))
-    (use (match_operand 5 "const_int_operand" ""))]
-  "TARGET_64BIT"
-{
- if (ix86_expand_setmem (operands[0], operands[1],
-			 operands[2], operands[3],
-			 operands[4], operands[5]))
-   DONE;
- else
-   FAIL;
-})
-
 ;; Most CPUs don't like single string operations
 ;; Handle this case here to simplify previous expander.
 
@@ -17968,76 +15861,44 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*strsetsi_1"
-  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+  [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
 	(match_operand:SI 2 "register_operand" "a"))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 1)
-		 (const_int 4)))]
-  "!TARGET_64BIT"
-  "stos{l|d}"
-  [(set_attr "type" "str")
-   (set_attr "memory" "store")
-   (set_attr "mode" "SI")])
-
-(define_insn "*strsetsi_rex_1"
-  [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
-	(match_operand:SI 2 "register_operand" "a"))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 1)
-		 (const_int 4)))]
-  "TARGET_64BIT"
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 4)))]
+  ""
   "stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
 (define_insn "*strsethi_1"
-  [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+  [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
 	(match_operand:HI 2 "register_operand" "a"))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 1)
-		 (const_int 2)))]
-  "!TARGET_64BIT"
-  "stosw"
-  [(set_attr "type" "str")
-   (set_attr "memory" "store")
-   (set_attr "mode" "HI")])
-
-(define_insn "*strsethi_rex_1"
-  [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
-	(match_operand:HI 2 "register_operand" "a"))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 1)
-		 (const_int 2)))]
-  "TARGET_64BIT"
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 2)))]
+  ""
   "stosw"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set_attr "mode" "HI")])
 
 (define_insn "*strsetqi_1"
-  [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+  [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
 	(match_operand:QI 2 "register_operand" "a"))
-   (set (match_operand:SI 0 "register_operand" "=D")
-	(plus:SI (match_dup 1)
-		 (const_int 1)))]
-  "!TARGET_64BIT"
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 1)))]
+  ""
   "stosb"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
-   (set_attr "mode" "QI")])
-
-(define_insn "*strsetqi_rex_1"
-  [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
-	(match_operand:QI 2 "register_operand" "a"))
-   (set (match_operand:DI 0 "register_operand" "=D")
-	(plus:DI (match_dup 1)
-		 (const_int 1)))]
-  "TARGET_64BIT"
-  "stosb"
-  [(set_attr "type" "str")
-   (set_attr "memory" "store")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "mode" "QI")])
 
 (define_expand "rep_stos"
@@ -18061,77 +15922,48 @@
    (use (match_operand:DI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "rep stosq"
+  "rep{%;} stosq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (set_attr "mode" "DI")])
 
 (define_insn "*rep_stossi"
-  [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
-   (set (match_operand:SI 0 "register_operand" "=D")
-        (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1")
-			    (const_int 2))
-		 (match_operand:SI 3 "register_operand" "0")))
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
-  "!TARGET_64BIT"
-  "rep stos{l|d}"
-  [(set_attr "type" "str")
-   (set_attr "prefix_rep" "1")
-   (set_attr "memory" "store")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rep_stossi_rex64"
-  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
-			    (const_int 2))
-		 (match_operand:DI 3 "register_operand" "0")))
-   (set (mem:BLK (match_dup 3))
-	(const_int 0))
-   (use (match_operand:SI 2 "register_operand" "a"))
-   (use (match_dup 4))]
-  "TARGET_64BIT"
-  "rep stos{l|d}"
+  ""
+  "rep{%;} stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
 (define_insn "*rep_stosqi"
-  [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
-   (set (match_operand:SI 0 "register_operand" "=D")
-        (plus:SI (match_operand:SI 3 "register_operand" "0")
-		 (match_operand:SI 4 "register_operand" "1")))
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
-  "!TARGET_64BIT"
-  "rep stosb"
+  ""
+  "rep{%;} stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rep_stosqi_rex64"
-  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (match_operand:DI 3 "register_operand" "0")
-		 (match_operand:DI 4 "register_operand" "1")))
-   (set (mem:BLK (match_dup 3))
-	(const_int 0))
-   (use (match_operand:QI 2 "register_operand" "a"))
-   (use (match_dup 4))]
-  "TARGET_64BIT"
-  "rep stosb"
-  [(set_attr "type" "str")
-   (set_attr "prefix_rep" "1")
-   (set_attr "memory" "store")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "mode" "QI")])
 
 (define_expand "cmpstrnsi"
@@ -18182,13 +16014,12 @@
     }
   else
     {
-      rtx (*cmp_insn)(rtx, rtx);
-
-      if (TARGET_64BIT)
-	cmp_insn = gen_cmpdi_1;
-      else
-	cmp_insn = gen_cmpsi_1;
-      emit_insn (cmp_insn (countreg, countreg));
+      rtx (*gen_cmp) (rtx, rtx);
+
+      gen_cmp = (TARGET_64BIT
+		 ? gen_cmpdi_1 : gen_cmpsi_1);
+
+      emit_insn (gen_cmp (countreg, countreg));
       emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
 				  operands[1], operands[2]));
     }
@@ -18215,8 +16046,10 @@
 			     (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
-  "operands[1] = gen_reg_rtx (QImode);
-   operands[2] = gen_reg_rtx (QImode);")
+{
+  operands[1] = gen_reg_rtx (QImode);
+  operands[2] = gen_reg_rtx (QImode);
+})
 
 ;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
 ;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
@@ -18235,33 +16068,22 @@
 
 (define_insn "*cmpstrnqi_nz_1"
   [(set (reg:CC FLAGS_REG)
-	(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
-		    (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
-   (use (match_operand:SI 6 "register_operand" "2"))
+	(compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		    (mem:BLK (match_operand:P 5 "register_operand" "1"))))
+   (use (match_operand:P 6 "register_operand" "2"))
    (use (match_operand:SI 3 "immediate_operand" "i"))
-   (clobber (match_operand:SI 0 "register_operand" "=S"))
-   (clobber (match_operand:SI 1 "register_operand" "=D"))
-   (clobber (match_operand:SI 2 "register_operand" "=c"))]
-  "!TARGET_64BIT"
-  "repz cmpsb"
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  ""
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
-   (set_attr "prefix_rep" "1")])
-
-(define_insn "*cmpstrnqi_nz_rex_1"
-  [(set (reg:CC FLAGS_REG)
-	(compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
-		    (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
-   (use (match_operand:DI 6 "register_operand" "2"))
-   (use (match_operand:SI 3 "immediate_operand" "i"))
-   (clobber (match_operand:DI 0 "register_operand" "=S"))
-   (clobber (match_operand:DI 1 "register_operand" "=D"))
-   (clobber (match_operand:DI 2 "register_operand" "=c"))]
-  "TARGET_64BIT"
-  "repz cmpsb"
-  [(set_attr "type" "str")
-   (set_attr "mode" "QI")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "prefix_rep" "1")])
 
 ;; The same, but the count is not known to not be zero.
@@ -18283,59 +16105,33 @@
 
 (define_insn "*cmpstrnqi_1"
   [(set (reg:CC FLAGS_REG)
-	(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
+	(if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
 			     (const_int 0))
-	  (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
-		      (mem:BLK (match_operand:SI 5 "register_operand" "1")))
-	  (const_int 0)))
-   (use (match_operand:SI 3 "immediate_operand" "i"))
-   (use (reg:CC FLAGS_REG))
-   (clobber (match_operand:SI 0 "register_operand" "=S"))
-   (clobber (match_operand:SI 1 "register_operand" "=D"))
-   (clobber (match_operand:SI 2 "register_operand" "=c"))]
-  "!TARGET_64BIT"
-  "repz cmpsb"
-  [(set_attr "type" "str")
-   (set_attr "mode" "QI")
-   (set_attr "prefix_rep" "1")])
-
-(define_insn "*cmpstrnqi_rex_1"
-  [(set (reg:CC FLAGS_REG)
-	(if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
-			     (const_int 0))
-	  (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
-		      (mem:BLK (match_operand:DI 5 "register_operand" "1")))
+	  (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		      (mem:BLK (match_operand:P 5 "register_operand" "1")))
 	  (const_int 0)))
    (use (match_operand:SI 3 "immediate_operand" "i"))
    (use (reg:CC FLAGS_REG))
-   (clobber (match_operand:DI 0 "register_operand" "=S"))
-   (clobber (match_operand:DI 1 "register_operand" "=D"))
-   (clobber (match_operand:DI 2 "register_operand" "=c"))]
-  "TARGET_64BIT"
-  "repz cmpsb"
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  ""
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "prefix_rep" "1")])
 
-(define_expand "strlensi"
-  [(set (match_operand:SI 0 "register_operand" "")
-	(unspec:SI [(match_operand:BLK 1 "general_operand" "")
-		    (match_operand:QI 2 "immediate_operand" "")
-		    (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
-  ""
-{
- if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
-   DONE;
- else
-   FAIL;
-})
-
-(define_expand "strlendi"
-  [(set (match_operand:DI 0 "register_operand" "")
-	(unspec:DI [(match_operand:BLK 1 "general_operand" "")
-		    (match_operand:QI 2 "immediate_operand" "")
-		    (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+(define_expand "strlen<mode>"
+  [(set (match_operand:SWI48x 0 "register_operand" "")
+	(unspec:SWI48x [(match_operand:BLK 1 "general_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")
+			(match_operand 3 "immediate_operand" "")]
+		       UNSPEC_SCAS))]
   ""
 {
  if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
@@ -18345,39 +16141,30 @@
 })
 
 (define_expand "strlenqi_1"
-  [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (match_operand 2 "" ""))
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
   "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strlenqi_1"
-  [(set (match_operand:SI 0 "register_operand" "=&c")
-	(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
-		    (match_operand:QI 2 "register_operand" "a")
-		    (match_operand:SI 3 "immediate_operand" "i")
-		    (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
-   (clobber (match_operand:SI 1 "register_operand" "=D"))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
-  "repnz scasb"
+  [(set (match_operand:P 0 "register_operand" "=&c")
+	(unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
+		   (match_operand:QI 2 "register_operand" "a")
+		   (match_operand:P 3 "immediate_operand" "i")
+		   (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "repnz{%;} scasb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
-   (set_attr "prefix_rep" "1")])
-
-(define_insn "*strlenqi_rex_1"
-  [(set (match_operand:DI 0 "register_operand" "=&c")
-	(unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
-		    (match_operand:QI 2 "register_operand" "a")
-		    (match_operand:DI 3 "immediate_operand" "i")
-		    (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
-   (clobber (match_operand:DI 1 "register_operand" "=D"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "repnz scasb"
-  [(set_attr "type" "str")
-   (set_attr "mode" "QI")
-   (set_attr "prefix_rex" "0")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
    (set_attr "prefix_rep" "1")])
 
 ;; Peephole optimizations to clean up after cmpstrn*.  This should be
@@ -18420,8 +16207,7 @@
      (use (match_dup 3))
      (clobber (match_dup 0))
      (clobber (match_dup 1))
-     (clobber (match_dup 2))])]
-  "")
+     (clobber (match_dup 2))])])
 
 ;; ...and this one handles cmpstrn*_1.
 (define_peephole2
@@ -18456,16 +16242,13 @@
      (use (reg:CC FLAGS_REG))
      (clobber (match_dup 0))
      (clobber (match_dup 1))
-     (clobber (match_dup 2))])]
-  "")
-
-
+     (clobber (match_dup 2))])])
 
 ;; Conditional move instructions.
 
 (define_expand "mov<mode>cc"
   [(set (match_operand:SWIM 0 "register_operand" "")
-	(if_then_else:SWIM (match_operand 1 "comparison_operator" "")
+	(if_then_else:SWIM (match_operand 1 "ordered_comparison_operator" "")
 			   (match_operand:SWIM 2 "general_operand" "")
 			   (match_operand:SWIM 3 "general_operand" "")))]
   ""
@@ -18484,9 +16267,7 @@
 	      (const_int 0)])
 	    (const_int -1)
 	    (const_int 0)))
-     (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
+     (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn "*x86_mov<mode>cc_0_m1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
@@ -18581,13 +16362,26 @@
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
 
-(define_insn "*movsfcc_1_387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
-	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+(define_insn "*movxfcc_1"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
 				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
-		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
-  "TARGET_80387 && TARGET_CMOVE
+		      (match_operand:XF 2 "register_operand" "f,0")
+		      (match_operand:XF 3 "register_operand" "0,f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov")
+   (set_attr "mode" "XF")])
+
+(define_insn "*movdfcc_1_rex64"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
    && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
@@ -18595,7 +16389,7 @@
    cmov%O2%C1\t{%2, %0|%0, %2}
    cmov%O2%c1\t{%3, %0|%0, %3}"
   [(set_attr "type" "fcmov,fcmov,icmov,icmov")
-   (set_attr "mode" "SF,SF,SI,SI")])
+   (set_attr "mode" "DF,DF,DI,DI")])
 
 (define_insn "*movdfcc_1"
   [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
@@ -18611,23 +16405,7 @@
    #
    #"
   [(set_attr "type" "fcmov,fcmov,multi,multi")
-   (set_attr "mode" "DF")])
-
-(define_insn "*movdfcc_1_rex64"
-  [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
-	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
-				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
-		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
-  "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
-   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
-  "@
-   fcmov%F1\t{%2, %0|%0, %2}
-   fcmov%f1\t{%3, %0|%0, %3}
-   cmov%O2%C1\t{%2, %0|%0, %2}
-   cmov%O2%c1\t{%3, %0|%0, %3}"
-  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "DF,DF,DI,DI")])
 
 (define_split
   [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
@@ -18645,21 +16423,26 @@
 	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
 		      (match_dup 7)
 		      (match_dup 8)))]
-  "split_di (&operands[2], 2, &operands[5], &operands[7]);
-   split_di (&operands[0], 1, &operands[2], &operands[3]);")
-
-(define_insn "*movxfcc_1"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+{
+  split_double_mode (DImode, &operands[2], 2, &operands[5], &operands[7]);
+  split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
+})
+
+(define_insn "*movsfcc_1_387"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
 				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:XF 2 "register_operand" "f,0")
-		      (match_operand:XF 3 "register_operand" "0,f")))]
-  "TARGET_80387 && TARGET_CMOVE"
+		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
-   fcmov%f1\t{%3, %0|%0, %3}"
-  [(set_attr "type" "fcmov")
-   (set_attr "mode" "XF")])
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "SF,SF,SI,SI")])
 
 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
 ;; the scalar versions to have only XMM registers as operands.
@@ -18686,7 +16469,7 @@
 	  (match_operand:MODEF 1 "nonimmediate_operand" "%x")
 	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
   "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "v<maxmin_float>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
@@ -18697,7 +16480,7 @@
 	  (match_operand:MODEF 1 "nonimmediate_operand" "%0")
 	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "<maxminfprefix>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  "<maxmin_float>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
@@ -18773,21 +16556,22 @@
   ;; The % modifier is not operational anymore in peephole2's, so we have to
   ;; swap the operands manually in the case of addition and multiplication.
   "if (COMMUTATIVE_ARITH_P (operands[2]))
-     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
-				 operands[0], operands[1]);
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]),
+				   GET_MODE (operands[2]),
+				   operands[0], operands[1]);
    else
-     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
-				 operands[1], operands[0]);")
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]),
+				   GET_MODE (operands[2]),
+				   operands[1], operands[0]);")
 
 ;; Conditional addition patterns
 (define_expand "add<mode>cc"
   [(match_operand:SWI 0 "register_operand" "")
-   (match_operand 1 "comparison_operator" "")
+   (match_operand 1 "ordered_comparison_operator" "")
    (match_operand:SWI 2 "register_operand" "")
    (match_operand:SWI 3 "const_int_operand" "")]
   ""
   "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
-
 
 ;; Misc patterns (?)
 
@@ -18800,43 +16584,37 @@
 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
 ;;
 ;; in proper program order.
-(define_insn "pro_epilogue_adjust_stack_1"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(plus:SI (match_operand:SI 1 "register_operand" "0,r")
-	         (match_operand:SI 2 "immediate_operand" "i,i")))
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_add"
+  [(set (match_operand:P 0 "register_operand" "=r,r")
+	(plus:P (match_operand:P 1 "register_operand" "0,r")
+	        (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
    (clobber (reg:CC FLAGS_REG))
    (clobber (mem:BLK (scratch)))]
-  "!TARGET_64BIT"
+  ""
 {
   switch (get_attr_type (insn))
     {
     case TYPE_IMOV:
-      return "mov{l}\t{%1, %0|%0, %1}";
+      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
 
     case TYPE_ALU:
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-	          && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{l}\t{%2, %0|%0, %2}";
-	}
-      return "add{l}\t{%2, %0|%0, %2}";
-
-    case TYPE_LEA:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+    default:
       operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{l}\t{%a2, %0|%0, %a2}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set (attr "type")
-	(cond [(and (eq_attr "alternative" "0") 
-	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+      return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
+    }
+}
+  [(set (attr "type")
+	(cond [(and (eq_attr "alternative" "0")
+		    (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 		 (const_string "alu")
-	       (match_operand:SI 2 "const0_operand" "")
+	       (match_operand:<MODE> 2 "const0_operand" "")
 		 (const_string "imov")
 	      ]
 	      (const_string "lea")))
@@ -18848,113 +16626,33 @@
 		 (const_string "1")
 	      ]
 	      (const_string "*")))
-   (set_attr "mode" "SI")])
-
-(define_insn "pro_epilogue_adjust_stack_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(plus:DI (match_operand:DI 1 "register_operand" "0,r")
-		 (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_sub"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(minus:P (match_operand:P 1 "register_operand" "0")
+		 (match_operand:P 2 "register_operand" "r")))
    (clobber (reg:CC FLAGS_REG))
    (clobber (mem:BLK (scratch)))]
-  "TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_IMOV:
-      return "mov{q}\t{%1, %0|%0, %1}";
-
-    case TYPE_ALU:
-      if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-	          && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{q}\t{%2, %0|%0, %2}";
-	}
-      return "add{q}\t{%2, %0|%0, %2}";
-
-    case TYPE_LEA:
-      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{q}\t{%a2, %0|%0, %a2}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set (attr "type")
-	(cond [(and (eq_attr "alternative" "0")
-	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
-		 (const_string "alu")
-	       (match_operand:DI 2 "const0_operand" "")
-		 (const_string "imov")
-	      ]
-	      (const_string "lea")))
-   (set (attr "length_immediate")
-	(cond [(eq_attr "type" "imov")
-		 (const_string "0")
-	       (and (eq_attr "type" "alu")
-		    (match_operand 2 "const128_operand" ""))
-		 (const_string "1")
-	      ]
-	      (const_string "*")))
-   (set_attr "mode" "DI")])
-
-(define_insn "pro_epilogue_adjust_stack_rex64_2"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(plus:DI (match_operand:DI 1 "register_operand" "0,r")
-		 (match_operand:DI 3 "immediate_operand" "i,i")))
-   (use (match_operand:DI 2 "register_operand" "r,r"))
-   (clobber (reg:CC FLAGS_REG))
-   (clobber (mem:BLK (scratch)))]
-  "TARGET_64BIT"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_ALU:
-      return "add{q}\t{%2, %0|%0, %2}";
-
-    case TYPE_LEA:
-      operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]);
-      return "lea{q}\t{%a2, %0|%0, %a2}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "alu,lea")
-   (set_attr "mode" "DI")])
-
-(define_insn "allocate_stack_worker_32"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")]
+  ""
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "allocate_stack_worker_probe_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
 			    UNSPECV_STACK_PROBE))
-   (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1)))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_STACK_PROBE"
-  "call\t___chkstk"
-  [(set_attr "type" "multi")
-   (set_attr "length" "5")])
-
-(define_insn "allocate_stack_worker_64"
-  [(set (match_operand:DI 0 "register_operand" "=a")
-	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")]
-			    UNSPECV_STACK_PROBE))
-   (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1)))
-   (clobber (reg:DI R10_REG))
-   (clobber (reg:DI R11_REG))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_STACK_PROBE"
-  "call\t___chkstk"
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_target_stack_probe ()"
+  "call\t___chkstk_ms"
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
 (define_expand "allocate_stack"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "general_operand" "")]
-  "TARGET_STACK_PROBE"
+  "ix86_target_stack_probe ()"
 {
   rtx x;
 
@@ -18974,10 +16672,13 @@
     {
       x = copy_to_mode_reg (Pmode, operands[1]);
       if (TARGET_64BIT)
-	x = gen_allocate_stack_worker_64 (x, x);
-      else
-	x = gen_allocate_stack_worker_32 (x, x);
-      emit_insn (x);
+        emit_insn (gen_allocate_stack_worker_probe_di (x, x));
+      else
+        emit_insn (gen_allocate_stack_worker_probe_si (x, x));
+      x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
+			       stack_pointer_rtx, 0, OPTAB_DIRECT);
+      if (x != stack_pointer_rtx)
+	emit_move_insn (stack_pointer_rtx, x);
     }
 
   emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
@@ -18989,12 +16690,36 @@
   [(match_operand 0 "memory_operand" "")]
   ""
 {
-  if (GET_MODE (operands[0]) == DImode)
-    emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx));
-  else
-    emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx));
-  DONE;
-})
+  rtx (*gen_ior3) (rtx, rtx, rtx);
+
+  gen_ior3 = (GET_MODE (operands[0]) == DImode
+	      ? gen_iordi3 : gen_iorsi3);
+
+  emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
+(define_insn "adjust_stack_and_probe<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (set (reg:P SP_REG)
+        (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "* return output_adjust_stack_and_probe (operands[0]);"
+  [(set_attr "type" "multi")])
+
+(define_insn "probe_stack_range<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "const_int_operand" "n")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "multi")])
 
 (define_expand "builtin_setjmp_receiver"
   [(label_ref (match_operand 0 "" ""))]
@@ -19128,7 +16853,7 @@
 
 (define_split
   [(set (match_operand 0 "register_operand" "")
-	(if_then_else (match_operator 1 "comparison_operator"
+	(if_then_else (match_operator 1 "ordered_comparison_operator"
 				[(reg FLAGS_REG) (const_int 0)])
 		      (match_operand 2 "register_operand" "")
 		      (match_operand 3 "register_operand" "")))]
@@ -19142,33 +16867,21 @@
   "operands[0] = gen_lowpart (SImode, operands[0]);
    operands[2] = gen_lowpart (SImode, operands[2]);
    operands[3] = gen_lowpart (SImode, operands[3]);")
-
 
 ;; RTL Peephole optimizations, run before sched2.  These primarily look to
 ;; transform a complex memory operation into two memory to register operations.
 
 ;; Don't push memory operands
 (define_peephole2
-  [(set (match_operand:SI 0 "push_operand" "")
-	(match_operand:SI 1 "memory_operand" ""))
-   (match_scratch:SI 2 "r")]
+  [(set (match_operand:SWI 0 "push_operand" "")
+	(match_operand:SWI 1 "memory_operand" ""))
+   (match_scratch:SWI 2 "<r>")]
   "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_peephole2
-  [(set (match_operand:DI 0 "push_operand" "")
-	(match_operand:DI 1 "memory_operand" ""))
-   (match_scratch:DI 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
-   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-;; We need to handle SFmode only, because DFmode and XFmode is split to
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to handle SFmode only, because DFmode and XFmode are split to
 ;; SImode pushes.
 (define_peephole2
   [(set (match_operand:SF 0 "push_operand" "")
@@ -19177,51 +16890,16 @@
   "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_peephole2
-  [(set (match_operand:HI 0 "push_operand" "")
-	(match_operand:HI 1 "memory_operand" ""))
-   (match_scratch:HI 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
-   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_peephole2
-  [(set (match_operand:QI 0 "push_operand" "")
-	(match_operand:QI 1 "memory_operand" ""))
-   (match_scratch:QI 2 "q")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
-   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 ;; Don't move an immediate directly to memory when the instruction
 ;; gets too big.
 (define_peephole2
-  [(match_scratch:SI 1 "r")
-   (set (match_operand:SI 0 "memory_operand" "")
+  [(match_scratch:SWI124 1 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand" "")
         (const_int 0))]
   "optimize_insn_for_speed_p ()
-   && ! TARGET_USE_MOV0
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
-   && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 1) (const_int 0))
-	      (clobber (reg:CC FLAGS_REG))])
-   (set (match_dup 0) (match_dup 1))]
-  "")
-
-(define_peephole2
-  [(match_scratch:HI 1 "r")
-   (set (match_operand:HI 0 "memory_operand" "")
-        (const_int 0))]
-  "optimize_insn_for_speed_p ()
-   && ! TARGET_USE_MOV0
+   && !TARGET_USE_MOV0
    && TARGET_SPLIT_LONG_MOVES
    && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
    && peep2_regno_dead_p (0, FLAGS_REG)"
@@ -19231,51 +16909,14 @@
   "operands[2] = gen_lowpart (SImode, operands[1]);")
 
 (define_peephole2
-  [(match_scratch:QI 1 "q")
-   (set (match_operand:QI 0 "memory_operand" "")
-        (const_int 0))]
-  "optimize_insn_for_speed_p ()
-   && ! TARGET_USE_MOV0
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
-   && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 2) (const_int 0))
-	      (clobber (reg:CC FLAGS_REG))])
-   (set (match_dup 0) (match_dup 1))]
-  "operands[2] = gen_lowpart (SImode, operands[1]);")
-
-(define_peephole2
-  [(match_scratch:SI 2 "r")
-   (set (match_operand:SI 0 "memory_operand" "")
-        (match_operand:SI 1 "immediate_operand" ""))]
+  [(match_scratch:SWI124 2 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand" "")
+        (match_operand:SWI124 1 "immediate_operand" ""))]
   "optimize_insn_for_speed_p ()
    && TARGET_SPLIT_LONG_MOVES
    && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_peephole2
-  [(match_scratch:HI 2 "r")
-   (set (match_operand:HI 0 "memory_operand" "")
-        (match_operand:HI 1 "immediate_operand" ""))]
-  "optimize_insn_for_speed_p ()
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_peephole2
-  [(match_scratch:QI 2 "q")
-   (set (match_operand:QI 0 "memory_operand" "")
-        (match_operand:QI 1 "immediate_operand" ""))]
-  "optimize_insn_for_speed_p ()
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 ;; Don't compare memory with zero, load and use a test instead.
 (define_peephole2
@@ -19286,8 +16927,7 @@
    (match_scratch:SI 3 "r")]
   "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
   [(set (match_dup 3) (match_dup 2))
-   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
-  "")
+   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
 
 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
 ;; Don't split NOTs with a displacement operand, because resulting XOR
@@ -19301,46 +16941,18 @@
 ;; lifetime information then.
 
 (define_peephole2
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "optimize_insn_for_speed_p ()
-   && ((TARGET_NOT_UNPAIRABLE
-        && (!MEM_P (operands[0])
-            || !memory_displacement_operand (operands[0], SImode)))
-       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode)))
-   && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0)
-		   (xor:SI (match_dup 1) (const_int -1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_peephole2
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  [(set (match_operand:SWI124 0 "nonimmediate_operand" "")
+	(not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "")))]
   "optimize_insn_for_speed_p ()
    && ((TARGET_NOT_UNPAIRABLE
-        && (!MEM_P (operands[0])
-            || !memory_displacement_operand (operands[0], HImode)))
-       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode)))
+	&& (!MEM_P (operands[0])
+	    || !memory_displacement_operand (operands[0], <MODE>mode)))
+       || (TARGET_NOT_VECTORMODE
+	   && long_memory_operand (operands[0], <MODE>mode)))
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
-		   (xor:HI (match_dup 1) (const_int -1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_peephole2
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
-  "optimize_insn_for_speed_p ()
-   && ((TARGET_NOT_UNPAIRABLE
-        && (!MEM_P (operands[0])
-            || !memory_displacement_operand (operands[0], QImode)))
-       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode)))
-   && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0)
-		   (xor:QI (match_dup 1) (const_int -1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
+		   (xor:SWI124 (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; Non pairable "test imm, reg" instructions can be translated to
 ;; "and imm, reg" if reg dies.  The "and" form is also shorter (one
@@ -19364,8 +16976,7 @@
 	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
 		            (const_int 0)]))
       (set (match_dup 2)
-	   (and:SI (match_dup 2) (match_dup 3)))])]
-  "")
+	   (and:SI (match_dup 2) (match_dup 3)))])])
 
 ;; We don't need to handle HImode case, because it will be promoted to SImode
 ;; on ! TARGET_PARTIAL_REG_STALL
@@ -19385,8 +16996,7 @@
 	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
 		            (const_int 0)]))
       (set (match_dup 2)
-	   (and:QI (match_dup 2) (match_dup 3)))])]
-  "")
+	   (and:QI (match_dup 2) (match_dup 3)))])])
 
 (define_peephole2
   [(set (match_operand 0 "flags_reg_operand" "")
@@ -19419,8 +17029,7 @@
 		       (match_dup 2)
 		       (const_int 8)
 		       (const_int 8))
-		     (match_dup 3)))])]
-  "")
+		     (match_dup 3)))])])
 
 ;; Don't do logical operations with memory inputs.
 (define_peephole2
@@ -19434,8 +17043,7 @@
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
-              (clobber (reg:CC FLAGS_REG))])]
-  "")
+              (clobber (reg:CC FLAGS_REG))])])
 
 (define_peephole2
   [(match_scratch:SI 2 "r")
@@ -19448,8 +17056,7 @@
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
-              (clobber (reg:CC FLAGS_REG))])]
-  "")
+              (clobber (reg:CC FLAGS_REG))])])
 
 ;; Prefer Load+RegOp to Mov+MemOp.  Watch out for cases when the memory address
 ;; refers to the destination of the load!
@@ -19483,8 +17090,7 @@
        || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))"
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0)
-        (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]
-  "")
+        (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
 
 ; Don't do logical operations with memory outputs
 ;
@@ -19506,8 +17112,7 @@
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
               (clobber (reg:CC FLAGS_REG))])
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 (define_peephole2
   [(match_scratch:SI 2 "r")
@@ -19523,8 +17128,7 @@
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
               (clobber (reg:CC FLAGS_REG))])
-   (set (match_dup 0) (match_dup 2))]
-  "")
+   (set (match_dup 0) (match_dup 2))])
 
 ;; Attempt to always use XOR for zeroing registers.
 (define_peephole2
@@ -19536,9 +17140,7 @@
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int 0))
 	      (clobber (reg:CC FLAGS_REG))])]
-{
-  operands[0] = gen_lowpart (word_mode, operands[0]);
-})
+  "operands[0] = gen_lowpart (word_mode, operands[0]);")
 
 (define_peephole2
   [(set (strict_low_part (match_operand 0 "register_operand" ""))
@@ -19550,66 +17152,48 @@
   [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
 	      (clobber (reg:CC FLAGS_REG))])])
 
-;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg.
-(define_peephole2
-  [(set (match_operand 0 "register_operand" "")
+;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+  [(set (match_operand:SWI248 0 "register_operand" "")
 	(const_int -1))]
-  "(GET_MODE (operands[0]) == HImode
-    || GET_MODE (operands[0]) == SImode
-    || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
-   && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
+  "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int -1))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode,
-			      operands[0]);")
-
-;; Attempt to convert simple leas to adds. These can be created by
-;; move expanders.
-(define_peephole2
-  [(set (match_operand:SI 0 "register_operand" "")
-  	(plus:SI (match_dup 0)
-		 (match_operand:SI 1 "nonmemory_operand" "")))]
+{
+  if (GET_MODE_SIZE (<MODE>mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+;; Attempt to convert simple lea to add/shift.
+;; These can be created by move expanders.
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand" "")
+  	(plus:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "<nonmemory_operand>" "")))]
   "peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
+  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
   	(subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
 			    (match_operand:DI 2 "nonmemory_operand" "")) 0))]
-  "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])"
+  "TARGET_64BIT
+   && peep2_regno_dead_p (0, FLAGS_REG)
+   && REGNO (operands[0]) == REGNO (operands[1])"
   [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_peephole2
-  [(set (match_operand:DI 0 "register_operand" "")
-  	(plus:DI (match_dup 0)
-		 (match_operand:DI 1 "x86_64_general_operand" "")))]
-  "peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "")
-
-(define_peephole2
-  [(set (match_operand:SI 0 "register_operand" "")
-  	(mult:SI (match_dup 0)
-		 (match_operand:SI 1 "const_int_operand" "")))]
+  [(set (match_operand:SWI48 0 "register_operand" "")
+  	(mult:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "const_int_operand" "")))]
   "exact_log2 (INTVAL (operands[1])) >= 0
    && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
-
-(define_peephole2
-  [(set (match_operand:DI 0 "register_operand" "")
-  	(mult:DI (match_dup 0)
-		 (match_operand:DI 1 "const_int_operand" "")))]
-  "exact_log2 (INTVAL (operands[1])) >= 0
-   && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
+  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 
@@ -19617,7 +17201,8 @@
   [(set (match_operand:SI 0 "register_operand" "")
   	(subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
 		   (match_operand:DI 2 "const_int_operand" "")) 0))]
-  "exact_log2 (INTVAL (operands[2])) >= 0
+  "TARGET_64BIT
+   && exact_log2 (INTVAL (operands[2])) >= 0
    && REGNO (operands[0]) == REGNO (operands[1])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
@@ -19625,12 +17210,13 @@
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
 
 ;; The ESP adjustments can be done by the push and pop instructions.  Resulting
-;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
-;; many CPUs it is also faster, since special hardware to avoid esp
+;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
+;; On many CPUs it is also faster, since special hardware to avoid esp
 ;; dependencies is present.
 
-;; While some of these conversions may be done using splitters, we use peepholes
-;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+;; While some of these conversions may be done using splitters, we use
+;; peepholes in order to allow combine_stack_adjustments pass to see
+;; nonobfuscated RTL.
 
 ;; Convert prologue esp subtractions to push.
 ;; We need register to push.  In order to keep verify_flow_info happy we have
@@ -19644,119 +17230,131 @@
 ;; alternative when no register is available later.
 
 (define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))
 	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
-  [(clobber (match_dup 0))
-   (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
 	      (clobber (mem:BLK (scratch)))])])
 
 (define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))
 	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
-  [(clobber (match_dup 0))
-   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
-   (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
 	      (clobber (mem:BLK (scratch)))])])
 
 ;; Convert esp subtractions to push.
 (define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
-  [(clobber (match_dup 0))
-   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
-
-(define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
-  [(clobber (match_dup 0))
-   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
-   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
 
 ;; Convert epilogue deallocator to pop.
 (define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))
 	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
-	      (clobber (mem:BLK (scratch)))])]
-  "")
-
-;; Two pops case is tricky, since pop causes dependency on destination register.
-;; We use two registers if available.
-(define_peephole2
-  [(match_scratch:SI 0 "r")
-   (match_scratch:SI 1 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+  "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (match_scratch:P 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))
 	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+  "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
 	      (clobber (mem:BLK (scratch)))])
-   (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
-  "")
-
-(define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))
 	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p ()"
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
 	      (clobber (mem:BLK (scratch)))])
-   (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
-  "")
+   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
 
 ;; Convert esp additions to pop.
 (define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
-  "")
-
-;; Two pops case is tricky, since pop causes dependency on destination register.
-;; We use two registers if available.
-(define_peephole2
-  [(match_scratch:SI 0 "r")
-   (match_scratch:SI 1 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
-   (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
-  "")
-
-(define_peephole2
-  [(match_scratch:SI 0 "r")
-   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p ()"
-  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
-   (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
-	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
-  "")
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (match_scratch:P 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
 
 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
 ;; required and register dies.  Similarly for 128 to -128.
@@ -19773,144 +17371,28 @@
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel [(set (match_dup 0)
 		   (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
-	      (clobber (match_dup 2))])]
-  "")
-
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
-  [(clobber (match_dup 0))
-   (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
-	      (clobber (mem:BLK (scratch)))])])
-
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
-  [(clobber (match_dup 0))
-   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
-   (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
-	      (clobber (mem:BLK (scratch)))])])
-
-;; Convert esp subtractions to push.
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
-  [(clobber (match_dup 0))
-   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
-
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
-  [(clobber (match_dup 0))
-   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
-   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
-
-;; Convert epilogue deallocator to pop.
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
-	      (clobber (mem:BLK (scratch)))])]
-  "")
-
-;; Two pops case is tricky, since pop causes dependency on destination register.
-;; We use two registers if available.
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (match_scratch:DI 1 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
-	      (clobber (mem:BLK (scratch)))])
-   (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
-  "")
-
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (mem:BLK (scratch)))])]
-  "optimize_insn_for_size_p ()"
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
-	      (clobber (mem:BLK (scratch)))])
-   (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
-  "")
-
-;; Convert esp additions to pop.
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
-  "")
-
-;; Two pops case is tricky, since pop causes dependency on destination register.
-;; We use two registers if available.
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (match_scratch:DI 1 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
-   (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
-  "")
-
-(define_peephole2
-  [(match_scratch:DI 0 "r")
-   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_size_p ()"
-  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
-   (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
-	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
-  "")
+	      (clobber (match_dup 2))])])
 
 ;; Convert imul by three, five and nine into lea
 (define_peephole2
   [(parallel
-    [(set (match_operand:SI 0 "register_operand" "")
-	  (mult:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "const_int_operand" "")))
+    [(set (match_operand:SWI48 0 "register_operand" "")
+	  (mult:SWI48 (match_operand:SWI48 1 "register_operand" "")
+		      (match_operand:SWI48 2 "const_int_operand" "")))
      (clobber (reg:CC FLAGS_REG))])]
   "INTVAL (operands[2]) == 3
    || INTVAL (operands[2]) == 5
    || INTVAL (operands[2]) == 9"
   [(set (match_dup 0)
-        (plus:SI (mult:SI (match_dup 1) (match_dup 2))
-                 (match_dup 1)))]
-  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+	(plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
+		    (match_dup 1)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
 
 (define_peephole2
   [(parallel
-    [(set (match_operand:SI 0 "register_operand" "")
-          (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                   (match_operand:SI 2 "const_int_operand" "")))
+    [(set (match_operand:SWI48 0 "register_operand" "")
+	  (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:SWI48 2 "const_int_operand" "")))
      (clobber (reg:CC FLAGS_REG))])]
   "optimize_insn_for_speed_p ()
    && (INTVAL (operands[2]) == 3
@@ -19918,69 +17400,23 @@
        || INTVAL (operands[2]) == 9)"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 0)
-        (plus:SI (mult:SI (match_dup 0) (match_dup 2))
-                 (match_dup 0)))]
-  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
-
-(define_peephole2
-  [(parallel
-    [(set (match_operand:DI 0 "register_operand" "")
-	  (mult:DI (match_operand:DI 1 "register_operand" "")
-		   (match_operand:DI 2 "const_int_operand" "")))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT
-   && (INTVAL (operands[2]) == 3
-       || INTVAL (operands[2]) == 5
-       || INTVAL (operands[2]) == 9)"
-  [(set (match_dup 0)
-        (plus:DI (mult:DI (match_dup 1) (match_dup 2))
-                 (match_dup 1)))]
-  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
-
-(define_peephole2
-  [(parallel
-    [(set (match_operand:DI 0 "register_operand" "")
-          (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
-                   (match_operand:DI 2 "const_int_operand" "")))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT
-   && optimize_insn_for_speed_p ()
-   && (INTVAL (operands[2]) == 3
-       || INTVAL (operands[2]) == 5
-       || INTVAL (operands[2]) == 9)"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 0)
-        (plus:DI (mult:DI (match_dup 0) (match_dup 2))
-                 (match_dup 0)))]
-  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
-
-;; Imul $32bit_imm, mem, reg is vector decoded, while
+	(plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
+		    (match_dup 0)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
+
+;; imul $32bit_imm, mem, reg is vector decoded, while
 ;; imul $32bit_imm, reg, reg is direct decoded.
 (define_peephole2
-  [(match_scratch:DI 3 "r")
-   (parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (mult:DI (match_operand:DI 1 "memory_operand" "")
-			    (match_operand:DI 2 "immediate_operand" "")))
+  [(match_scratch:SWI48 3 "r")
+   (parallel [(set (match_operand:SWI48 0 "register_operand" "")
+		   (mult:SWI48 (match_operand:SWI48 1 "memory_operand" "")
+			       (match_operand:SWI48 2 "immediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
-   (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-"")
-
-(define_peephole2
-  [(match_scratch:SI 3 "r")
-   (parallel [(set (match_operand:SI 0 "register_operand" "")
-		   (mult:SI (match_operand:SI 1 "memory_operand" "")
-			    (match_operand:SI 2 "immediate_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
-   && !satisfies_constraint_K (operands[2])"
-  [(set (match_dup 3) (match_dup 1))
-   (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-"")
+   (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_peephole2
   [(match_scratch:SI 3 "r")
@@ -19989,58 +17425,29 @@
 		     (mult:SI (match_operand:SI 1 "memory_operand" "")
 			      (match_operand:SI 2 "immediate_operand" ""))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+  "TARGET_64BIT
+   && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
-   (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
-	      (clobber (reg:CC FLAGS_REG))])]
-"")
+   (parallel [(set (match_dup 0)
+		   (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
 ;; Convert it into imul reg, reg
 ;; It would be better to force assembler to encode instruction using long
 ;; immediate, but there is apparently no way to do so.
 (define_peephole2
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
-			    (match_operand:DI 2 "const_int_operand" "")))
+  [(parallel [(set (match_operand:SWI248 0 "register_operand" "")
+		   (mult:SWI248
+		    (match_operand:SWI248 1 "nonimmediate_operand" "")
+		    (match_operand:SWI248 2 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
-   (match_scratch:DI 3 "r")]
+   (match_scratch:SWI248 3 "r")]
   "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
    && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
-   (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
-	      (clobber (reg:CC FLAGS_REG))])]
-{
-  if (!rtx_equal_p (operands[0], operands[1]))
-    emit_move_insn (operands[0], operands[1]);
-})
-
-(define_peephole2
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
-		   (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
-			    (match_operand:SI 2 "const_int_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_scratch:SI 3 "r")]
-  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
-   && satisfies_constraint_K (operands[2])"
-  [(set (match_dup 3) (match_dup 2))
-   (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
-	      (clobber (reg:CC FLAGS_REG))])]
-{
-  if (!rtx_equal_p (operands[0], operands[1]))
-    emit_move_insn (operands[0], operands[1]);
-})
-
-(define_peephole2
-  [(parallel [(set (match_operand:HI 0 "register_operand" "")
-		   (mult:HI (match_operand:HI 1 "nonimmediate_operand" "")
-			    (match_operand:HI 2 "immediate_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])
-   (match_scratch:HI 3 "r")]
-  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()"
-  [(set (match_dup 3) (match_dup 2))
-   (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
+   (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
   if (!rtx_equal_p (operands[0], operands[1]))
@@ -20060,54 +17467,66 @@
 ;;  leal    (%edx,%eax,4), %eax
 
 (define_peephole2
-  [(parallel [(set (match_operand 0 "register_operand" "")
+  [(match_scratch:P 5 "r")
+   (parallel [(set (match_operand 0 "register_operand" "")
 		   (ashift (match_operand 1 "register_operand" "")
 			   (match_operand 2 "const_int_operand" "")))
 	       (clobber (reg:CC FLAGS_REG))])
-   (set (match_operand 3 "register_operand")
-        (match_operand 4 "x86_64_general_operand" ""))
-   (parallel [(set (match_operand 5 "register_operand" "")
-		   (plus (match_operand 6 "register_operand" "")
-			 (match_operand 7 "register_operand" "")))
+   (parallel [(set (match_operand 3 "register_operand" "")
+		   (plus (match_dup 0)
+			 (match_operand 4 "x86_64_general_operand" "")))
 		   (clobber (reg:CC FLAGS_REG))])]
-  "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
+  "IN_RANGE (INTVAL (operands[2]), 1, 3)
    /* Validate MODE for lea.  */
    && ((!TARGET_PARTIAL_REG_STALL
 	&& (GET_MODE (operands[0]) == QImode
 	    || GET_MODE (operands[0]) == HImode))
        || GET_MODE (operands[0]) == SImode
        || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   && (rtx_equal_p (operands[0], operands[3])
+       || peep2_reg_dead_p (2, operands[0]))
    /* We reorder load and the shift.  */
-   && !rtx_equal_p (operands[1], operands[3])
-   && !reg_overlap_mentioned_p (operands[0], operands[4])
-   /* Last PLUS must consist of operand 0 and 3.  */
-   && !rtx_equal_p (operands[0], operands[3])
-   && (rtx_equal_p (operands[3], operands[6])
-       || rtx_equal_p (operands[3], operands[7]))
-   && (rtx_equal_p (operands[0], operands[6])
-       || rtx_equal_p (operands[0], operands[7]))
-   /* The intermediate operand 0 must die or be same as output.  */
-   && (rtx_equal_p (operands[0], operands[5])
-       || peep2_reg_dead_p (3, operands[0]))"
-  [(set (match_dup 3) (match_dup 4))
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(set (match_dup 5) (match_dup 4))
    (set (match_dup 0) (match_dup 1))]
 {
-  enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+  enum machine_mode op1mode = GET_MODE (operands[1]);
+  enum machine_mode mode = op1mode == DImode ? DImode : SImode;
   int scale = 1 << INTVAL (operands[2]);
   rtx index = gen_lowpart (Pmode, operands[1]);
-  rtx base = gen_lowpart (Pmode, operands[3]);
-  rtx dest = gen_lowpart (mode, operands[5]);
+  rtx base = gen_lowpart (Pmode, operands[5]);
+  rtx dest = gen_lowpart (mode, operands[3]);
 
   operands[1] = gen_rtx_PLUS (Pmode, base,
   			      gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+  operands[5] = base;
   if (mode != Pmode)
     operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+  if (op1mode != Pmode)
+    operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0);
   operands[0] = dest;
 })
 
 ;; Call-value patterns last so that the wildcard operand does not
 ;; disrupt insn-recog's switch tables.
 
+(define_insn_and_split "*call_value_pop_0_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_pop_0"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@@ -20116,12 +17535,24 @@
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "")))]
   "!TARGET_64BIT"
-{
-  if (SIBLING_CALL_P (insn))
-    return "jmp\t%P1";
-  else
-    return "call\t%P1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_pop_1_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "i")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_pop_1"
@@ -20132,11 +17563,24 @@
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "i")))]
   "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (constant_call_address_operand (operands[1], Pmode))
-    return "call\t%P1";
-  return "call\t%A1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
+ [(parallel
+   [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "i,i")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*sibcall_value_pop_1"
@@ -20147,9 +17591,20 @@
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "i,i")))]
   "!TARGET_64BIT && SIBLING_CALL_P (insn)"
-  "@
-   jmp\t%P1
-   jmp\t%A1"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_0"
@@ -20157,12 +17612,20 @@
 	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
 	      (match_operand:SI 2 "" "")))]
   "!TARGET_64BIT"
-{
-  if (SIBLING_CALL_P (insn))
-    return "jmp\t%P1";
-  else
-    return "call\t%P1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_0_rex64"
@@ -20170,12 +17633,34 @@
 	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
 	      (match_operand:DI 2 "const_int_operand" "")))]
   "TARGET_64BIT"
-{
-  if (SIBLING_CALL_P (insn))
-    return "jmp\t%P1";
-  else
-    return "call\t%P1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+		(match_operand:DI 2 "const_int_operand" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_0_rex64_ms_sysv"
@@ -20196,12 +17681,20 @@
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (SIBLING_CALL_P (insn))
-    return "jmp\t%P1";
-  else
-    return "call\t%P1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_1"
@@ -20209,11 +17702,20 @@
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
 	      (match_operand:SI 2 "" "")))]
   "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (constant_call_address_operand (operands[1], Pmode))
-    return "call\t%P1";
-  return "call\t%A1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*sibcall_value_1"
@@ -20221,9 +17723,21 @@
 	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
 	      (match_operand:SI 2 "" "")))]
   "!TARGET_64BIT && SIBLING_CALL_P (insn)"
-  "@
-   jmp\t%P1
-   jmp\t%A1"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_1_rex64"
@@ -20232,11 +17746,34 @@
 	      (match_operand:DI 2 "" "")))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)
    && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
-{
-  if (constant_call_address_operand (operands[1], Pmode))
-    return "call\t%P1";
-  return "call\t%A1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+		(match_operand:DI 2 "" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_1_rex64_ms_sysv"
@@ -20257,11 +17794,20 @@
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)"
-{
-  if (constant_call_address_operand (operands[1], Pmode))
-    return "call\t%P1";
-  return "call\t%A1";
-}
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*call_value_1_rex64_large"
@@ -20269,7 +17815,20 @@
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
 	      (match_operand:DI 2 "" "")))]
   "TARGET_64BIT && !SIBLING_CALL_P (insn)"
-  "call\t%A1"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
   [(set_attr "type" "callv")])
 
 (define_insn "*sibcall_value_1_rex64"
@@ -20277,9 +17836,7 @@
 	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
 	      (match_operand:DI 2 "" "")))]
   "TARGET_64BIT && SIBLING_CALL_P (insn)"
-  "@
-   jmp\t%P1
-   jmp\t%A1"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
@@ -20293,74 +17850,6 @@
   { return ASM_SHORT "0x0b0f"; }
   [(set_attr "length" "2")])
 
-(define_expand "sse_prologue_save"
-  [(parallel [(set (match_operand:BLK 0 "" "")
-		   (unspec:BLK [(reg:DI XMM0_REG)
-				(reg:DI XMM1_REG)
-				(reg:DI XMM2_REG)
-				(reg:DI XMM3_REG)
-				(reg:DI XMM4_REG)
-				(reg:DI XMM5_REG)
-				(reg:DI XMM6_REG)
-				(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-	      (use (match_operand:DI 1 "register_operand" ""))
-	      (use (match_operand:DI 2 "immediate_operand" ""))
-	      (use (label_ref:DI (match_operand 3 "" "")))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*sse_prologue_save_insn"
-  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-			  (match_operand:DI 4 "const_int_operand" "n")))
-	(unspec:BLK [(reg:DI XMM0_REG)
-		     (reg:DI XMM1_REG)
-		     (reg:DI XMM2_REG)
-		     (reg:DI XMM3_REG)
-		     (reg:DI XMM4_REG)
-		     (reg:DI XMM5_REG)
-		     (reg:DI XMM6_REG)
-		     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-   (use (match_operand:DI 1 "register_operand" "r"))
-   (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))]
-  "TARGET_64BIT
-   && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
-   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-{
-  int i;
-  operands[0] = gen_rtx_MEM (Pmode,
-			     gen_rtx_PLUS (Pmode, operands[0], operands[4]));
-  /* VEX instruction with a REX prefix will #UD.  */
-  if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
-    gcc_unreachable ();
-
-  output_asm_insn ("jmp\t%A1", operands);
-  for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
-    {
-      operands[4] = adjust_address (operands[0], DImode, i*16);
-      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
-      PUT_MODE (operands[4], TImode);
-      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
-        output_asm_insn ("rex", operands);
-      output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
-    }
-  (*targetm.asm_out.internal_label) (asm_out_file, "L",
-				     CODE_LABEL_NUMBER (operands[3]));
-  return "";
-}
-  [(set_attr "type" "other")
-   (set_attr "length_immediate" "0")
-   (set_attr "length_address" "0")
-   (set (attr "length")
-     (if_then_else
-       (eq (symbol_ref "TARGET_AVX") (const_int 0))
-       (const_string "34")
-       (const_string "42")))
-   (set_attr "memory" "store")
-   (set_attr "modrm" "0")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "DI")])
-
 (define_expand "prefetch"
   [(prefetch (match_operand 0 "address_operand" "")
 	     (match_operand:SI 1 "const_int_operand" "")
@@ -20385,31 +17874,11 @@
     operands[1] = const0_rtx;
 })
 
-(define_insn "*prefetch_sse"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
+(define_insn "*prefetch_sse_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
 	     (const_int 0)
 	     (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
-{
-  static const char * const patterns[4] = {
-   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
-  };
-
-  int locality = INTVAL (operands[1]);
-  gcc_assert (locality >= 0 && locality <= 3);
-
-  return patterns[locality];
-}
-  [(set_attr "type" "sse")
-   (set_attr "atom_sse_attr" "prefetch")
-   (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])"))
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_sse_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-	     (const_int 0)
-	     (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && TARGET_64BIT"
+  "TARGET_PREFETCH_SSE"
 {
   static const char * const patterns[4] = {
    "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
@@ -20422,29 +17891,15 @@
 }
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "prefetch")
-   (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])"))
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0])"))
    (set_attr "memory" "none")])
 
-(define_insn "*prefetch_3dnow"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
+(define_insn "*prefetch_3dnow_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
 	     (match_operand:SI 1 "const_int_operand" "n")
 	     (const_int 3))]
-  "TARGET_3DNOW && !TARGET_64BIT"
-{
-  if (INTVAL (operands[1]) == 0)
-    return "prefetch\t%a0";
-  else
-    return "prefetchw\t%a0";
-}
-  [(set_attr "type" "mmx")
-   (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])"))
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_3dnow_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-	     (match_operand:SI 1 "const_int_operand" "n")
-	     (const_int 3))]
-  "TARGET_3DNOW && TARGET_64BIT"
+  "TARGET_3DNOW"
 {
   if (INTVAL (operands[1]) == 0)
     return "prefetch\t%a0";
@@ -20452,7 +17907,8 @@
     return "prefetchw\t%a0";
 }
   [(set_attr "type" "mmx")
-   (set (attr "length_address") (symbol_ref "memory_address_length (operands[0])"))
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0])"))
    (set_attr "memory" "none")])
 
 (define_expand "stack_protect_set"
@@ -20460,64 +17916,40 @@
    (match_operand 1 "memory_operand" "")]
   ""
 {
+  rtx (*insn)(rtx, rtx);
+
 #ifdef TARGET_THREAD_SSP_OFFSET
-  if (TARGET_64BIT)
-    emit_insn (gen_stack_tls_protect_set_di (operands[0],
-					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
-  else
-    emit_insn (gen_stack_tls_protect_set_si (operands[0],
-					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_64BIT
+	  ? gen_stack_tls_protect_set_di
+	  : gen_stack_tls_protect_set_si);
 #else
-  if (TARGET_64BIT)
-    emit_insn (gen_stack_protect_set_di (operands[0], operands[1]));
-  else
-    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+  insn = (TARGET_64BIT
+	  ? gen_stack_protect_set_di
+	  : gen_stack_protect_set_si);
 #endif
-  DONE;
-})
-
-(define_insn "stack_protect_set_si"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
-   (set (match_scratch:SI 2 "=&r") (const_int 0))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_set_<mode>"
+  [(set (match_operand:P 0 "memory_operand" "=m")
+	(unspec:P [(match_operand:P 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:P 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
   [(set_attr "type" "multi")])
 
-(define_insn "stack_protect_set_di"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
-   (set (match_scratch:DI 2 "=&r") (const_int 0))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
-  [(set_attr "type" "multi")])
-
-(define_insn "stack_tls_protect_set_si"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-	(unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
-   (set (match_scratch:SI 2 "=&r") (const_int 0))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
-  [(set_attr "type" "multi")])
-
-(define_insn "stack_tls_protect_set_di"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-	(unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
-   (set (match_scratch:DI 2 "=&r") (const_int 0))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  {
-     /* The kernel uses a different segment register for performance reasons; a
-        system call would not have to trash the userspace segment register,
-        which would be expensive */
-     if (ix86_cmodel != CM_KERNEL)
-        return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
-     else
-        return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
-  }
+(define_insn "stack_tls_protect_set_<mode>"
+  [(set (match_operand:P 0 "memory_operand" "=m")
+	(unspec:P [(match_operand:P 1 "const_int_operand" "i")]
+		  UNSPEC_SP_TLS_SET))
+   (set (match_scratch:P 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
   [(set_attr "type" "multi")])
 
 (define_expand "stack_protect_test"
@@ -20528,71 +17960,44 @@
 {
   rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
 
+  rtx (*insn)(rtx, rtx, rtx);
+
 #ifdef TARGET_THREAD_SSP_OFFSET
-  if (TARGET_64BIT)
-    emit_insn (gen_stack_tls_protect_test_di (flags, operands[0],
-					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
-  else
-    emit_insn (gen_stack_tls_protect_test_si (flags, operands[0],
-					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_64BIT
+	  ? gen_stack_tls_protect_test_di
+	  : gen_stack_tls_protect_test_si);
 #else
-  if (TARGET_64BIT)
-    emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1]));
-  else
-    emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1]));
+  insn = (TARGET_64BIT
+	  ? gen_stack_protect_test_di
+	  : gen_stack_protect_test_si);
 #endif
 
+  emit_insn (insn (flags, operands[0], operands[1]));
+
   emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
 				  flags, const0_rtx, operands[2]));
   DONE;
 })
 
-(define_insn "stack_protect_test_si"
-  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
-	(unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
-		     (match_operand:SI 2 "memory_operand" "m")]
-		    UNSPEC_SP_TEST))
-   (clobber (match_scratch:SI 3 "=&r"))]
-  ""
-  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}"
-  [(set_attr "type" "multi")])
-
-(define_insn "stack_protect_test_di"
-  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
-	(unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
-		     (match_operand:DI 2 "memory_operand" "m")]
-		    UNSPEC_SP_TEST))
-   (clobber (match_scratch:DI 3 "=&r"))]
-  "TARGET_64BIT"
-  "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}"
-  [(set_attr "type" "multi")])
-
-(define_insn "stack_tls_protect_test_si"
+(define_insn "stack_protect_test_<mode>"
   [(set (match_operand:CCZ 0 "flags_reg_operand" "")
-	(unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
-		     (match_operand:SI 2 "const_int_operand" "i")]
-		    UNSPEC_SP_TLS_TEST))
-   (clobber (match_scratch:SI 3 "=r"))]
-  ""
-  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}"
+	(unspec:CCZ [(match_operand:P 1 "memory_operand" "m")
+		     (match_operand:P 2 "memory_operand" "m")]
+		    UNSPEC_SP_TEST))
+   (clobber (match_scratch:P 3 "=&r"))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}"
   [(set_attr "type" "multi")])
 
-(define_insn "stack_tls_protect_test_di"
+(define_insn "stack_tls_protect_test_<mode>"
   [(set (match_operand:CCZ 0 "flags_reg_operand" "")
-	(unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
-		     (match_operand:DI 2 "const_int_operand" "i")]
+	(unspec:CCZ [(match_operand:P 1 "memory_operand" "m")
+		     (match_operand:P 2 "const_int_operand" "i")]
 		    UNSPEC_SP_TLS_TEST))
-   (clobber (match_scratch:DI 3 "=r"))]
-  "TARGET_64BIT"
-  {
-     /* The kernel uses a different segment register for performance reasons; a
-        system call would not have to trash the userspace segment register,
-        which would be expensive */
-     if (ix86_cmodel != CM_KERNEL)
-        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}";
-     else
-        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}";
-  }
+   (clobber (match_scratch:P 3 "=r"))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}"
   [(set_attr "type" "multi")])
 
 (define_insn "sse4_2_crc32<mode>"
@@ -20801,8 +18206,7 @@
 (define_expand "lwp_llwpcb"
   [(unspec_volatile [(match_operand 0 "register_operand" "r")]
 		    UNSPECV_LLWP_INTRINSIC)]
-  "TARGET_LWP"
-  "")
+  "TARGET_LWP")
 
 (define_insn "*lwp_llwpcb<mode>1"
   [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
@@ -20817,13 +18221,13 @@
   [(set (match_operand 0 "register_operand" "=r")
 	(unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
   "TARGET_LWP"
-  {
-    if (TARGET_64BIT)
-      emit_insn (gen_lwp_slwpcbdi (operands[0]));
-    else
-      emit_insn (gen_lwp_slwpcbsi (operands[0]));
-    DONE;
-  })
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_lwp_slwpcbdi (operands[0]));
+  else
+    emit_insn (gen_lwp_slwpcbsi (operands[0]));
+  DONE;
+})
 
 (define_insn "lwp_slwpcb<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
@@ -20863,8 +18267,7 @@
 			     UNSPECV_LWPINS_INTRINSIC))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 	(eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
-  "TARGET_LWP"
-  "")
+  "TARGET_LWP")
 
 (define_insn "*lwp_lwpins<mode>3_1"
   [(set (reg:CCC FLAGS_REG)
@@ -20879,6 +18282,48 @@
    (set (attr "length")
         (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
 
+(define_insn "rdfsbase<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDFSBASE))]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "rdfsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "rdgsbase<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDGSBASE))]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "rdgsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "wrfsbase<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_WRFSBASE)]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "wrfsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "wrgsbase<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_WRGSBASE)]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "wrgsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "rdrand<mode>_1"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(unspec:SWI248 [(const_int 0)] UNSPEC_RDRAND))
+   (set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(const_int 0)] UNSPEC_RDRAND))]
+  "TARGET_RDRND"
+  "rdrand\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "1")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")