diff gcc/config/i386/i386.md @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 58ad6c70ea60
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/i386/i386.md	Fri Jul 17 14:47:48 2009 +0900
@@ -0,0 +1,21975 @@
+;; GCC machine description for IA-32 and x86-64.
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;; Mostly by William Schelter.
+;; x86_64 support added by Jan Hubicka
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+;;
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;
+;; The special asm out single letter directives following a '%' are:
+;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of
+;;     operands[1].
+;; 'L' Print the opcode suffix for a 32-bit integer opcode.
+;; 'W' Print the opcode suffix for a 16-bit integer opcode.
+;; 'B' Print the opcode suffix for an 8-bit integer opcode.
+;; 'Q' Print the opcode suffix for a 64-bit float opcode.
+;; 'S' Print the opcode suffix for a 32-bit float opcode.
+;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode.
+;; 'J' Print the appropriate jump operand.
+;;
+;; 'b' Print the QImode name of the register for the indicated operand.
+;;     %b0 would print %al if operands[0] is reg 0.
+;; 'w' Likewise, print the HImode name of the register.
+;; 'k' Likewise, print the SImode name of the register.
+;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh.
+;; 'y' Print "st(0)" instead of "st" as a register.
+
+;; UNSPEC usage:
+
+(define_constants
+  [; Relocation specifiers
+   (UNSPEC_GOT			0)
+   (UNSPEC_GOTOFF		1)
+   (UNSPEC_GOTPCREL		2)
+   (UNSPEC_GOTTPOFF		3)
+   (UNSPEC_TPOFF		4)
+   (UNSPEC_NTPOFF		5)
+   (UNSPEC_DTPOFF		6)
+   (UNSPEC_GOTNTPOFF		7)
+   (UNSPEC_INDNTPOFF		8)
+   (UNSPEC_PLTOFF		9)
+   (UNSPEC_MACHOPIC_OFFSET	10)
+
+   ; Prologue support
+   (UNSPEC_STACK_ALLOC		11)
+   (UNSPEC_SET_GOT		12)
+   (UNSPEC_SSE_PROLOGUE_SAVE	13)
+   (UNSPEC_REG_SAVE		14)
+   (UNSPEC_DEF_CFA		15)
+   (UNSPEC_SET_RIP		16)
+   (UNSPEC_SET_GOT_OFFSET	17)
+   (UNSPEC_MEMORY_BLOCKAGE	18)
+
+   ; TLS support
+   (UNSPEC_TP			20)
+   (UNSPEC_TLS_GD		21)
+   (UNSPEC_TLS_LD_BASE		22)
+   (UNSPEC_TLSDESC		23)
+
+   ; Other random patterns
+   (UNSPEC_SCAS			30)
+   (UNSPEC_FNSTSW		31)
+   (UNSPEC_SAHF			32)
+   (UNSPEC_FSTCW		33)
+   (UNSPEC_ADD_CARRY		34)
+   (UNSPEC_FLDCW		35)
+   (UNSPEC_REP			36)
+   (UNSPEC_EH_RETURN		37)
+   (UNSPEC_LD_MPIC		38)	; load_macho_picbase
+   (UNSPEC_TRUNC_NOOP		39)
+
+   ; For SSE/MMX support:
+   (UNSPEC_FIX_NOTRUNC		40)
+   (UNSPEC_MASKMOV		41)
+   (UNSPEC_MOVMSK		42)
+   (UNSPEC_MOVNT		43)
+   (UNSPEC_MOVU			44)
+   (UNSPEC_RCP			45)
+   (UNSPEC_RSQRT		46)
+   (UNSPEC_SFENCE		47)
+   (UNSPEC_PFRCP		49)
+   (UNSPEC_PFRCPIT1		40)
+   (UNSPEC_PFRCPIT2		41)
+   (UNSPEC_PFRSQRT		42)
+   (UNSPEC_PFRSQIT1		43)
+   (UNSPEC_MFENCE		44)
+   (UNSPEC_LFENCE		45)
+   (UNSPEC_PSADBW		46)
+   (UNSPEC_LDDQU		47)
+   (UNSPEC_MS_TO_SYSV_CALL	48)
+
+   ; Generic math support
+   (UNSPEC_COPYSIGN		50)
+   (UNSPEC_IEEE_MIN		51)	; not commutative
+   (UNSPEC_IEEE_MAX		52)	; not commutative
+
+   ; x87 Floating point
+   (UNSPEC_SIN			60)
+   (UNSPEC_COS			61)
+   (UNSPEC_FPATAN		62)
+   (UNSPEC_FYL2X		63)
+   (UNSPEC_FYL2XP1		64)
+   (UNSPEC_FRNDINT		65)
+   (UNSPEC_FIST			66)
+   (UNSPEC_F2XM1		67)
+   (UNSPEC_TAN			68)
+   (UNSPEC_FXAM			69)
+
+   ; x87 Rounding
+   (UNSPEC_FRNDINT_FLOOR	70)
+   (UNSPEC_FRNDINT_CEIL 	71)
+   (UNSPEC_FRNDINT_TRUNC	72)
+   (UNSPEC_FRNDINT_MASK_PM	73)
+   (UNSPEC_FIST_FLOOR		74)
+   (UNSPEC_FIST_CEIL 		75)
+
+   ; x87 Double output FP
+   (UNSPEC_SINCOS_COS		80)
+   (UNSPEC_SINCOS_SIN		81)
+   (UNSPEC_XTRACT_FRACT		84)
+   (UNSPEC_XTRACT_EXP		85)
+   (UNSPEC_FSCALE_FRACT		86)
+   (UNSPEC_FSCALE_EXP		87)
+   (UNSPEC_FPREM_F		88)
+   (UNSPEC_FPREM_U		89)
+   (UNSPEC_FPREM1_F		90)
+   (UNSPEC_FPREM1_U		91)
+
+   (UNSPEC_C2_FLAG		95)
+   (UNSPEC_FXAM_MEM		96)
+
+   ; SSP patterns
+   (UNSPEC_SP_SET		100)
+   (UNSPEC_SP_TEST		101)
+   (UNSPEC_SP_TLS_SET		102)
+   (UNSPEC_SP_TLS_TEST		103)
+
+   ; SSSE3
+   (UNSPEC_PSHUFB		120)
+   (UNSPEC_PSIGN		121)
+   (UNSPEC_PALIGNR		122)
+
+   ; For SSE4A support
+   (UNSPEC_EXTRQI               130)
+   (UNSPEC_EXTRQ                131)
+   (UNSPEC_INSERTQI             132)
+   (UNSPEC_INSERTQ              133)
+
+   ; For SSE4.1 support
+   (UNSPEC_BLENDV		134)
+   (UNSPEC_INSERTPS		135)
+   (UNSPEC_DP			136)
+   (UNSPEC_MOVNTDQA		137)
+   (UNSPEC_MPSADBW		138)
+   (UNSPEC_PHMINPOSUW		139)
+   (UNSPEC_PTEST		140)
+   (UNSPEC_ROUND		141)
+
+   ; For SSE4.2 support
+   (UNSPEC_CRC32		143)
+   (UNSPEC_PCMPESTR		144)
+   (UNSPEC_PCMPISTR		145)
+
+   ;; For SSE5
+   (UNSPEC_SSE5_INTRINSIC	150)
+   (UNSPEC_SSE5_UNSIGNED_CMP	151)
+   (UNSPEC_SSE5_TRUEFALSE	152)
+   (UNSPEC_SSE5_PERMUTE		153)
+   (UNSPEC_FRCZ			154)
+   (UNSPEC_CVTPH2PS		155)
+   (UNSPEC_CVTPS2PH		156)
+
+   ; For AES support
+   (UNSPEC_AESENC		159)
+   (UNSPEC_AESENCLAST		160)
+   (UNSPEC_AESDEC		161)
+   (UNSPEC_AESDECLAST		162)
+   (UNSPEC_AESIMC		163)
+   (UNSPEC_AESKEYGENASSIST	164)
+
+   ; For PCLMUL support
+   (UNSPEC_PCLMUL		165)
+
+   ; For AVX support
+   (UNSPEC_PCMP			166)
+   (UNSPEC_VPERMIL		167)
+   (UNSPEC_VPERMIL2F128		168)
+   (UNSPEC_MASKLOAD		169)
+   (UNSPEC_MASKSTORE		170)
+   (UNSPEC_CAST			171)
+   (UNSPEC_VTESTP		172)
+  ])
+
+(define_constants
+  [(UNSPECV_BLOCKAGE		0)
+   (UNSPECV_STACK_PROBE		1)
+   (UNSPECV_EMMS		2)
+   (UNSPECV_LDMXCSR		3)
+   (UNSPECV_STMXCSR		4)
+   (UNSPECV_FEMMS		5)
+   (UNSPECV_CLFLUSH		6)
+   (UNSPECV_ALIGN		7)
+   (UNSPECV_MONITOR		8)
+   (UNSPECV_MWAIT		9)
+   (UNSPECV_CMPXCHG		10)
+   (UNSPECV_XCHG		12)
+   (UNSPECV_LOCK		13)
+   (UNSPECV_PROLOGUE_USE	14)
+   (UNSPECV_CLD			15)
+   (UNSPECV_VZEROALL		16)
+   (UNSPECV_VZEROUPPER		17)
+  ])
+
+;; Constants to represent pcomtrue/pcomfalse variants
+(define_constants
+  [(PCOM_FALSE			0)
+   (PCOM_TRUE			1)
+   (COM_FALSE_S			2)
+   (COM_FALSE_P			3)
+   (COM_TRUE_S			4)
+   (COM_TRUE_P			5)
+  ])
+
+;; Constants used in the SSE5 pperm instruction
+(define_constants
+  [(PPERM_SRC			0x00)	/* copy source */
+   (PPERM_INVERT		0x20)	/* invert source */
+   (PPERM_REVERSE		0x40)	/* bit reverse source */
+   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
+   (PPERM_ZERO			0x80)	/* all 0's */
+   (PPERM_ONES			0xa0)	/* all 1's */
+   (PPERM_SIGN			0xc0)	/* propagate sign bit */
+   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
+   (PPERM_SRC1			0x00)	/* use first source byte */
+   (PPERM_SRC2			0x10)	/* use second source byte */
+   ])
+
+;; Registers by name.
+(define_constants
+  [(AX_REG			 0)
+   (DX_REG			 1)
+   (CX_REG			 2)
+   (BX_REG			 3)
+   (SI_REG			 4)
+   (DI_REG			 5)
+   (BP_REG			 6)
+   (SP_REG			 7)
+   (ST0_REG			 8)
+   (ST1_REG			 9)
+   (ST2_REG			10)
+   (ST3_REG			11)
+   (ST4_REG			12)
+   (ST5_REG			13)
+   (ST6_REG			14)
+   (ST7_REG			15)
+   (FLAGS_REG			17)
+   (FPSR_REG			18)
+   (FPCR_REG			19)
+   (XMM0_REG			21)
+   (XMM1_REG			22)
+   (XMM2_REG			23)
+   (XMM3_REG			24)
+   (XMM4_REG			25)
+   (XMM5_REG			26)
+   (XMM6_REG			27)
+   (XMM7_REG			28)
+   (MM0_REG			29)
+   (MM1_REG			30)
+   (MM2_REG			31)
+   (MM3_REG			32)
+   (MM4_REG			33)
+   (MM5_REG			34)
+   (MM6_REG			35)
+   (MM7_REG			36)
+   (R8_REG			37)
+   (R9_REG			38)
+   (R10_REG			39)
+   (R11_REG			40)
+   (R13_REG			42)
+   (XMM8_REG			45)
+   (XMM9_REG			46)
+   (XMM10_REG			47)
+   (XMM11_REG			48)
+   (XMM12_REG			49)
+   (XMM13_REG			50)
+   (XMM14_REG			51)
+   (XMM15_REG			52)
+  ])
+
+;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
+;; from i386.c.
+
+;; In C guard expressions, put expressions which may be compile-time
+;; constants first.  This allows for better optimization.  For
+;; example, write "TARGET_64BIT && reload_completed", not
+;; "reload_completed && TARGET_64BIT".
+
+
+;; Processor type.
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+		    generic64,amdfam10"
+  (const (symbol_ref "ix86_schedule")))
+
+;; A basic instruction type.  Refinements due to arguments to be
+;; provided in other attributes.
+(define_attr "type"
+  "other,multi,
+   alu,alu1,negnot,imov,imovx,lea,
+   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   icmp,test,ibr,setcc,icmov,
+   push,pop,call,callv,leave,
+   str,bitmanip,
+   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
+   sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
+   ssemuladd,sse4arg,
+   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
+  (const_string "other"))
+
+;; Main data type used by the insn
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+  (const_string "unknown"))
+
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+	   (const_string "i387")
+	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
+	   (const_string "sse")
+	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+	   (const_string "mmx")
+	 (eq_attr "type" "other")
+	   (const_string "unknown")]
+	 (const_string "integer")))
+
+;; The (bounding maximum) length of an instruction immediate.
+(define_attr "length_immediate" ""
+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
+                          bitmanip")
+	   (const_int 0)
+	 (eq_attr "unit" "i387,sse,mmx")
+	   (const_int 0)
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
+			  imul,icmp,push,pop")
+	   (symbol_ref "ix86_attr_length_immediate_default(insn,1)")
+	 (eq_attr "type" "imov,test")
+	   (symbol_ref "ix86_attr_length_immediate_default(insn,0)")
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand" "")
+	     (const_int 4)
+	     (const_int 0))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand" "")
+	     (const_int 4)
+	     (const_int 0))
+	 ;; We don't know the size before shorten_branches.  Expect
+	 ;; the instruction to fit for better scheduling.
+	 (eq_attr "type" "ibr")
+	   (const_int 1)
+	 ]
+	 (symbol_ref "/* Update immediate_length and other attributes! */
+		      gcc_unreachable (),1")))
+
+;; The (bounding maximum) length of an instruction address.
+(define_attr "length_address" ""
+  (cond [(eq_attr "type" "str,other,multi,fxch")
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 ]
+	 (symbol_ref "ix86_attr_length_address_default (insn)")))
+
+;; Set when length prefix is used.
+(define_attr "prefix_data16" ""
+  (if_then_else (ior (eq_attr "mode" "HI")
+		     (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF")))
+    (const_int 1)
+    (const_int 0)))
+
+;; Set when string REP prefix is used.
+(define_attr "prefix_rep" ""
+  (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+    (const_int 1)
+    (const_int 0)))
+
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_0f" ""
+  (if_then_else
+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+	 (eq_attr "unit" "sse,mmx"))
+    (const_int 1)
+    (const_int 0)))
+
+;; Set when REX opcode prefix is used.
+(define_attr "prefix_rex" ""
+  (cond [(and (eq_attr "mode" "DI")
+  	      (eq_attr "type" "!push,pop,call,callv,leave,ibr"))
+	   (const_int 1)
+	 (and (eq_attr "mode" "QI")
+	      (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)")
+		  (const_int 0)))
+	   (const_int 1)
+	 (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+	     (const_int 0))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; There are also additional prefixes in SSSE3.
+(define_attr "prefix_extra" "" (const_int 0))
+
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex"
+  (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+    (const_string "vex")
+    (const_string "orig")))
+
+;; There is a 8bit immediate for VEX.
+(define_attr "prefix_vex_imm8" "" (const_int 0))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+(define_attr "length_vex" ""
+  (if_then_else (eq_attr "prefix_0f" "1")
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)"))))
+
+;; Set when modrm byte is used.
+(define_attr "modrm" ""
+  (cond [(eq_attr "type" "str,leave")
+	   (const_int 0)
+	 (eq_attr "unit" "i387")
+	   (const_int 0)
+         (and (eq_attr "type" "incdec")
+	      (ior (match_operand:SI 1 "register_operand" "")
+		   (match_operand:HI 1 "register_operand" "")))
+	   (const_int 0)
+	 (and (eq_attr "type" "push")
+	      (not (match_operand 1 "memory_operand" "")))
+	   (const_int 0)
+	 (and (eq_attr "type" "pop")
+	      (not (match_operand 0 "memory_operand" "")))
+	   (const_int 0)
+	 (and (eq_attr "type" "imov")
+	      (ior (and (match_operand 0 "register_operand" "")
+			(match_operand 1 "immediate_operand" ""))
+		   (ior (and (match_operand 0 "ax_reg_operand" "")
+			     (match_operand 1 "memory_displacement_only_operand" ""))
+			(and (match_operand 0 "memory_displacement_only_operand" "")
+			     (match_operand 1 "ax_reg_operand" "")))))
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 ]
+	 (const_int 1)))
+
+;; The (bounding maximum) length of an instruction in bytes.
+;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
+;; Later we may want to split them and compute proper length as for
+;; other insns.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "other,multi,fistp,frndint")
+	   (const_int 16)
+	 (eq_attr "type" "fcmp")
+	   (const_int 4)
+	 (eq_attr "unit" "i387")
+	   (plus (const_int 2)
+		 (plus (attr "prefix_data16")
+		       (attr "length_address")))
+	 (ior (eq_attr "prefix" "vex")
+	      (and (eq_attr "prefix" "maybe_vex")
+		    (ne (symbol_ref "TARGET_AVX") (const_int 0))))
+	   (plus (attr "length_vex")
+		 (plus (attr "prefix_vex_imm8")
+		       (plus (attr "modrm")
+			     (attr "length_address"))))]
+	 (plus (plus (attr "modrm")
+		     (plus (attr "prefix_0f")
+			   (plus (attr "prefix_rex")
+				 (plus (attr "prefix_extra")
+				       (const_int 1)))))
+	       (plus (attr "prefix_rep")
+		     (plus (attr "prefix_data16")
+			   (plus (attr "length_immediate")
+				 (attr "length_address")))))))
+
+;; The `memory' attribute is `none' if no memory is referenced, `load' or
+;; `store' if there is a simple memory reference therein, or `unknown'
+;; if the instruction is complex.
+
+(define_attr "memory" "none,load,store,both,unknown"
+  (cond [(eq_attr "type" "other,multi,str")
+	   (const_string "unknown")
+	 (eq_attr "type" "lea,fcmov,fpspc")
+	   (const_string "none")
+	 (eq_attr "type" "fistp,leave")
+	   (const_string "both")
+	 (eq_attr "type" "frndint")
+	   (const_string "load")
+	 (eq_attr "type" "push")
+	   (if_then_else (match_operand 1 "memory_operand" "")
+	     (const_string "both")
+	     (const_string "store"))
+	 (eq_attr "type" "pop")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "both")
+	     (const_string "load"))
+	 (eq_attr "type" "setcc")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "store")
+	     (const_string "none"))
+	 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
+	   (if_then_else (ior (match_operand 0 "memory_operand" "")
+			      (match_operand 1 "memory_operand" ""))
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "ibr")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand" "")
+	     (const_string "none")
+	     (const_string "load"))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand" "")
+	     (const_string "none")
+	     (const_string "load"))
+	 (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
+	      (match_operand 1 "memory_operand" ""))
+	   (const_string "both")
+	 (and (match_operand 0 "memory_operand" "")
+	      (match_operand 1 "memory_operand" ""))
+	   (const_string "both")
+	 (match_operand 0 "memory_operand" "")
+	   (const_string "store")
+	 (match_operand 1 "memory_operand" "")
+	   (const_string "load")
+	 (and (eq_attr "type"
+		 "!alu1,negnot,ishift1,
+		   imov,imovx,icmp,test,bitmanip,
+		   fmov,fcmp,fsgn,
+		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
+		   sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
+	      (match_operand 2 "memory_operand" ""))
+	   (const_string "load")
+	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
+	      (match_operand 3 "memory_operand" ""))
+	   (const_string "load")
+	]
+	(const_string "none")))
+
+;; Indicates if an instruction has both an immediate and a displacement.
+
+(define_attr "imm_disp" "false,true,unknown"
+  (cond [(eq_attr "type" "other,multi")
+	   (const_string "unknown")
+	 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
+	      (and (match_operand 0 "memory_displacement_operand" "")
+		   (match_operand 1 "immediate_operand" "")))
+	   (const_string "true")
+	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	      (and (match_operand 0 "memory_displacement_operand" "")
+		   (match_operand 2 "immediate_operand" "")))
+	   (const_string "true")
+	]
+	(const_string "false")))
+
+;; Indicates if an FP operation has an integer source.
+
+(define_attr "fp_int_src" "false,true"
+  (const_string "false"))
+
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+  (const_string "any"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "length" "128")
+   (set_attr "type" "multi")])
+
+;; All integer comparison codes.
+(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ])
+
+;; All floating-point comparison codes.
+(define_code_iterator fp_cond [unordered ordered
+			       uneq unge ungt unle unlt ltgt ])
+
+(define_code_iterator plusminus [plus minus])
+
+(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+
+;; Base name for define_insn
+(define_code_attr plusminus_insn
+  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminus_mnemonic
+  [(plus "add") (ss_plus "adds") (us_plus "addus")
+   (minus "sub") (ss_minus "subs") (us_minus "subus")])
+
+;; Mark commutative operators as such in constraints.
+(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
+			(minus "") (ss_minus "") (us_minus "")])
+
+;; Mapping of signed max and min
+(define_code_iterator smaxmin [smax smin])
+
+;; Mapping of unsigned max and min
+(define_code_iterator umaxmin [umax umin])
+
+;; Mapping of signed/unsigned max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
+;; Base name for integer and FP insn mnemonic
+(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
+				 (umax "maxu") (umin "minu")])
+(define_code_attr maxminfprefix [(smax "max") (smin "min")])
+
+;; Mapping of parallel logic operators
+(define_code_iterator plogic [and ior xor])
+
+;; Base name for insn mnemonic.
+(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of abs neg operators
+(define_code_iterator absneg [abs neg])
+
+;; Base name for x87 insn mnemonic.
+(define_code_attr absnegprefix [(abs "abs") (neg "chs")])
+
+;; All single word integer modes.
+(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without QImode.
+(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+
+;; Instruction suffix for integer modes.
+(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Register class for integer modes.
+(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
+
+;; Immediate operand constraint for integer modes.
+(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")])
+
+;; General operand predicate for integer modes.
+(define_mode_attr general_operand
+	[(QI "general_operand")
+	 (HI "general_operand")
+	 (SI "general_operand")
+	 (DI "x86_64_general_operand")])
+
+;; SSE and x87 SFmode and DFmode floating point modes
+(define_mode_iterator MODEF [SF DF])
+
+;; All x87 floating point modes
+(define_mode_iterator X87MODEF [SF DF XF])
+
+;; All integer modes handled by x87 fisttp operator.
+(define_mode_iterator X87MODEI [HI SI DI])
+
+;; All integer modes handled by integer x87 operators.
+(define_mode_iterator X87MODEI12 [HI SI])
+
+;; All integer modes handled by SSE cvtts?2si* operators.
+(define_mode_iterator SSEMODEI24 [SI DI])
+
+;; SSE asm suffix for floating point modes
+(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")])
+
+;; SSE vector mode corresponding to a scalar mode
+(define_mode_attr ssevecmode
+  [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+
+;; Scheduling descriptions
+
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
+(include "geode.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare instructions.
+
+;; All compare insns have expanders that save the operands away without
+;; actually generating RTL.  The bCOND or sCOND (emitted immediately
+;; after the cmp) will actually emit the cmpM.
+
+(define_expand "cmpti"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:TI 0 "nonimmediate_operand" "")
+		    (match_operand:TI 1 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[0] = force_reg (TImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_expand "cmpdi"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+		    (match_operand:DI 1 "x86_64_general_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[0] = force_reg (DImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_expand "cmpsi"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SI 0 "cmpsi_operand" "")
+		    (match_operand:SI 1 "general_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[0] = force_reg (SImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_expand "cmphi"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:HI 0 "nonimmediate_operand" "")
+		    (match_operand:HI 1 "general_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[0] = force_reg (HImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_expand "cmpqi"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:QI 0 "nonimmediate_operand" "")
+		    (match_operand:QI 1 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+{
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[0] = force_reg (QImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_insn "cmpdi_ccno_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr")
+		 (match_operand:DI 1 "const0_operand" "")))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{q}\t%0, %0
+   cmp{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*cmpdi_minus_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r")
+			   (match_operand:DI 1 "x86_64_general_operand" "re,mr"))
+		 (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "DI")])
+
+(define_expand "cmpdi_1_rex64"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+		    (match_operand:DI 1 "general_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "cmpdi_1_insn_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "mr,r")
+		 (match_operand:DI 1 "x86_64_general_operand" "re,mr")))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "DI")])
+
+
+(define_insn "*cmpsi_ccno_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr")
+		 (match_operand:SI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{l}\t%0, %0
+   cmp{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*cmpsi_minus_1"
+  [(set (reg FLAGS_REG)
+	(compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+			   (match_operand:SI 1 "general_operand" "ri,mr"))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "SI")])
+
+(define_expand "cmpsi_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SI 0 "nonimmediate_operand" "")
+		    (match_operand:SI 1 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "*cmpsi_1_insn"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+		 (match_operand:SI 1 "general_operand" "ri,mr")))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+    && ix86_match_ccmode (insn, CCmode)"
+  "cmp{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "SI")])
+
+(define_insn "*cmphi_ccno_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr")
+		 (match_operand:HI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{w}\t%0, %0
+   cmp{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_minus_1"
+  [(set (reg FLAGS_REG)
+	(compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+			   (match_operand:HI 1 "general_operand" "rn,mr"))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+		 (match_operand:HI 1 "general_operand" "rn,mr")))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ix86_match_ccmode (insn, CCmode)"
+  "cmp{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "HI")])
+
+(define_insn "*cmpqi_ccno_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq")
+		 (match_operand:QI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{b}\t%0, %0
+   cmp{b}\t{$0, %0|%0, 0}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+		 (match_operand:QI 1 "general_operand" "qn,mq")))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+    && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_minus_1"
+  [(set (reg FLAGS_REG)
+	(compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+			   (match_operand:QI 1 "general_operand" "qn,mq"))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:QI 0 "general_operand" "Qm")
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:QI 0 "register_operand" "Q")
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t%h0, %h0"
+  [(set_attr "type" "test")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpqi_ext_3"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "cmpqi_ext_3_insn"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "general_operand" "Qmn")))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "cmpqi_ext_3_insn_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "nonmemory_operand" "Qn")))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+;; These implement float point compares.
+;; %%% See if we can get away with VOIDmode operands on the actual insns,
+;; which would allow mix and match FP modes on the compares.  Which is what
+;; the old patterns did, but with many more of them.
+
+(define_expand "cmpxf"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:XF 0 "nonmemory_operand" "")
+		    (match_operand:XF 1 "nonmemory_operand" "")))]
+  "TARGET_80387"
+{
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+(define_expand "cmp<mode>"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "")
+		    (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
+;; FP compares, step 1:
+;; Set the FP condition codes.
+;;
+;; CCFPmode	compare with exceptions
+;; CCFPUmode	compare with no exceptions
+
+;; We may not use "#" to split and emit these, since the REG_DEAD notes
+;; used to manage the reg stack popping would not be preserved.
+
+(define_insn "*cmpfp_0"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand 1 "register_operand" "f")
+	     (match_operand 2 "const0_operand" ""))]
+	UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_0_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand 1 "register_operand" "f")
+	  (match_operand 2 "const0_operand" "")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn "*cmpfp_xf"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:XF 1 "register_operand" "f")
+	     (match_operand:XF 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn_and_split "*cmpfp_xf_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:XF 1 "register_operand" "f")
+	  (match_operand:XF 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cmpfp_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:MODEF 1 "register_operand" "f")
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:MODEF 1 "register_operand" "f")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpfp_u"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFPU
+	     (match_operand 1 "register_operand" "f")
+	     (match_operand 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "* return output_fp_compare (insn, operands, 0, 1);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_u_cc"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU
+	  (match_operand 1 "register_operand" "f")
+	  (match_operand 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFPU (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn "*cmpfp_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand 1 "register_operand" "f")
+	     (match_operator 3 "float_operator"
+	       [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
+	  UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand 1 "register_operand" "f")
+	  (match_operator 3 "float_operator"
+	    [(match_operand:X87MODEI12 2 "memory_operand" "m")])))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_dup 1)
+	     (match_op_dup 3 [(match_dup 2)]))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+;; FP compares, step 2
+;; Move the fpsw to ax.
+
+(define_insn "x86_fnstsw_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "SI")
+   (set_attr "unit" "i387")])
+
+;; FP compares, step 3
+;; Get ax into flags, general case.
+
+(define_insn "x86_sahf_1"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
+		   UNSPEC_SAHF))]
+  "TARGET_SAHF"
+{
+#ifdef HAVE_AS_IX86_SAHF
+  return "sahf";
+#else
+  return ".byte\t0x9e";
+#endif
+}
+  [(set_attr "length" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; Pentium Pro can do steps 1 through 3 in one go.
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
+(define_insn "*cmpfp_i_mixed"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "f,x")
+		      (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_i_sse"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "x")
+		      (match_operand 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_i_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "f")
+		      (match_operand 1 "register_operand" "f")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "fcmp")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_mixed"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "f,x")
+		       (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_sse"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "x")
+		       (match_operand 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_387"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "f")
+		       (match_operand 1 "register_operand" "f")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "fcmp")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
+
+;; Move instructions.
+
+;; General case of fullword move.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (SImode, operands); DONE;")
+
+;; Push/pop instructions.  They are separate since autoinc/dec is not a
+;; general_operand.
+;;
+;; %%% We don't use a post-inc memory reference because x86 is not a
+;; general AUTO_INC_DEC host, which impacts how it is treated in flow.
+;; Changing this impacts compiler performance on other non-AUTO_INC_DEC
+;; targets without our curiosities, and it is just as easy to represent
+;; this differently.
+
+(define_insn "*pushsi2"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+  "!TARGET_64BIT"
+  "push{l}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushsi2_rex64"
+  [(set (match_operand:SI 0 "push_operand" "=X")
+	(match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+(define_insn "*pushsi2_prologue"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+  "push{l}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+(define_insn "*popsi1_epilogue"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+	(mem:SI (reg:SI SP_REG)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG) (const_int 4)))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+  "pop{l}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "SI")])
+
+(define_insn "popsi1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+	(mem:SI (reg:SI SP_REG)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG) (const_int 4)))]
+  "!TARGET_64BIT"
+  "pop{l}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movsi_xor"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{l}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_or"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "immediate_operand" "i"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && operands[1] == constm1_rtx"
+{
+  operands[1] = constm1_rtx;
+  return "or{l}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*movsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			"=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+	(match_operand:SI 1 "general_operand"
+			"g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      if (get_attr_mode (insn) == MODE_TI)
+        return "%vpxor\t%0, %d0";
+      return "%vxorps\t%0, %d0";
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_SI:
+          return "%vmovd\t{%1, %0|%0, %1}";
+	case MODE_SF:
+          return "%vmovss\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      if (get_attr_mode (insn) == MODE_DI)
+	return "movq\t{%1, %0|%0, %1}";
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_LEA:
+      return "lea{l}\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      return "mov{l}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "3,4,5")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "6")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "7,8,9,10,11")
+	      (const_string "ssemov")
+ 	    (match_operand:DI 1 "pic_32bit_operand" "")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2,3")
+	      (const_string "DI")
+	    (eq_attr "alternative" "6,7")
+	      (if_then_else
+	        (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+	        (const_string "V4SF")
+	        (const_string "TI"))
+	    (and (eq_attr "alternative" "8,9,10,11")
+	         (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+	      (const_string "SF")
+	   ]
+	   (const_string "SI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabssi_1_rex64"
+  [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:SI 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{l}\t{%1, %P0|%P0, %1}
+   mov{l}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movabssi_2_rex64"
+  [(set (match_operand:SI 0 "register_operand" "=a,r")
+        (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{l}\t{%P1, %0|%0, %P1}
+   mov{l}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*swapsi"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(match_operand:SI 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  ""
+  "xchg{l}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+        (match_operand:HI 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (HImode, operands); DONE;")
+
+(define_insn "*pushhi2"
+  [(set (match_operand:HI 0 "push_operand" "=X")
+	(match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
+  "!TARGET_64BIT"
+  "push{l}\t%k1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushhi2_rex64"
+  [(set (match_operand:HI 0 "push_operand" "=X")
+	(match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{w}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "0")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_HIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand" ""))
+	      (const_string "imov")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "0,2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+    (set (attr "mode")
+      (cond [(eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "1,2")
+		  (match_operand:HI 1 "aligned_operand" ""))
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "0")
+		  (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			   (const_int 0))
+		       (eq (symbol_ref "TARGET_HIMODE_MATH")
+			   (const_int 0))))
+	       (const_string "SI")
+	    ]
+	    (const_string "HI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabshi_1_rex64"
+  [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:HI 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{w}\t{%1, %P0|%P0, %1}
+   mov{w}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "HI")])
+
+(define_insn "*movabshi_2_rex64"
+  [(set (match_operand:HI 0 "register_operand" "=a,r")
+        (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{w}\t{%P1, %0|%0, %P1}
+   mov{w}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "HI")])
+
+(define_insn "*swaphi_1"
+  [(set (match_operand:HI 0 "register_operand" "+r")
+	(match_operand:HI 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "xchg{l}\t%k1, %k0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
+(define_insn "*swaphi_2"
+  [(set (match_operand:HI 0 "register_operand" "+r")
+	(match_operand:HI 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_PARTIAL_REG_STALL"
+  "xchg{w}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "HI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
+  /* Don't generate memory->memory moves, go through a register */
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*movstricthi_1"
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r"))
+	(match_operand:HI 1 "general_operand" "rn,m"))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "mov{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "HI")])
+
+(define_insn "*movstricthi_xor"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(match_operand:HI 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{w}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "HI")
+   (set_attr "length_immediate" "0")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (QImode, operands); DONE;")
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte".  But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+(define_insn "*pushqi2"
+  [(set (match_operand:QI 0 "push_operand" "=X")
+	(match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))]
+  "!TARGET_64BIT"
+  "push{l}\t%k1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushqi2_rex64"
+  [(set (match_operand:QI 0 "push_operand" "=X")
+	(match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there.  Then we use movzx.
+(define_insn "*movqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+	(match_operand:QI 1 "general_operand"      " q,qn,qm,q,rn,qm,qn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
+      return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (eq_attr "alternative" "5")
+		 (not (match_operand:QI 1 "aligned_operand" "")))
+	      (const_string "imovx")
+	    (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "3")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_QIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (eq_attr "alternative" "3,5")
+	      (const_string "imovx")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+   (set (attr "mode")
+      (cond [(eq_attr "alternative" "3,4,5")
+	       (const_string "SI")
+	     (eq_attr "alternative" "6")
+	       (const_string "QI")
+	     (eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+				(const_int 0))
+			    (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
+				     (const_int 0))
+			    	 (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				     (const_int 0))))))
+	       (const_string "SI")
+	     ;; Avoid partial register stalls when not using QImode arithmetic
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				(const_int 0))
+			    (eq (symbol_ref "TARGET_QIMODE_MATH")
+				(const_int 0)))))
+	       (const_string "SI")
+	   ]
+	   (const_string "QI")))])
+
+(define_insn "*swapqi_1"
+  [(set (match_operand:QI 0 "register_operand" "+r")
+	(match_operand:QI 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "xchg{l}\t%k1, %k0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
+(define_insn "*swapqi_2"
+  [(set (match_operand:QI 0 "register_operand" "+q")
+	(match_operand:QI 1 "register_operand" "+q"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_PARTIAL_REG_STALL"
+  "xchg{b}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
+  /* Don't generate memory->memory moves, go through a register.  */
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*movstrictqi_1"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(match_operand:QI 1 "general_operand" "*qn,m"))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "mov{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movstrictqi_xor"
+  [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
+	(match_operand:QI 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{b}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_extv_1"
+  [(set (match_operand:SI 0 "register_operand" "=R")
+	(sign_extract:SI (match_operand 1 "ext_register_operand" "Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  ""
+  "movs{bl|x}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movhi_extv_1"
+  [(set (match_operand:HI 0 "register_operand" "=R")
+	(sign_extract:HI (match_operand 1 "ext_register_operand" "Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  ""
+  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extv_1_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsqi_1_rex64"
+  [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:QI 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{b}\t{%1, %P0|%P0, %1}
+   mov{b}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movabsqi_2_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=a,r")
+        (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{b}\t{%P1, %0|%0, %P1}
+   mov{b}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movdi_extzv_1"
+  [(set (match_operand:DI 0 "register_operand" "=R")
+	(zero_extract:DI (match_operand 1 "ext_register_operand" "Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_64BIT"
+  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movsi_extzv_1"
+  [(set (match_operand:SI 0 "register_operand" "=R")
+	(zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  ""
+  "movz{bl|x}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+        (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+				    (const_int 8)
+				    (const_int 8)) 0))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_2_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+				    (const_int 8)
+				    (const_int 8)) 0))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "movsi_insv_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "general_operand" "Qmn"))]
+  "!TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movsi_insv_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "nonmemory_operand" "Qn"))]
+  "TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "movdi_insv_1_rex64"
+  [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:DI 1 "nonmemory_operand" "Qn"))]
+  "TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+		     (const_int 8)))]
+  ""
+  "mov{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (DImode, operands); DONE;")
+
+(define_insn "*pushdi"
+  [(set (match_operand:DI 0 "push_operand" "=<")
+	(match_operand:DI 1 "general_no_elim_operand" "riF*m"))]
+  "!TARGET_64BIT"
+  "#")
+
+(define_insn "*pushdi2_rex64"
+  [(set (match_operand:DI 0 "push_operand" "=<,!<")
+	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+  "TARGET_64BIT"
+  "@
+   push{q}\t%1
+   #"
+  [(set_attr "type" "push,multi")
+   (set_attr "mode" "DI")])
+
+;; Convert impossible pushes of immediate to existing instructions.
+;; First try to get scratch register and go through it.  In case this
+;; fails, push sign extended lower part first and then overwrite
+;; upper part by 32bit move.
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode) && 1"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "split_di (&operands[1], 1, &operands[2], &operands[3]);
+   operands[1] = gen_lowpart (DImode, operands[2]);
+   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						    GEN_INT (4)));
+  ")
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)
+   && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "split_di (&operands[1], 1, &operands[2], &operands[3]);
+   operands[1] = gen_lowpart (DImode, operands[2]);
+   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						    GEN_INT (4)));
+  ")
+
+(define_insn "*pushdi2_prologue_rex64"
+  [(set (match_operand:DI 0 "push_operand" "=<")
+	(match_operand:DI 1 "general_no_elim_operand" "re*m"))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+  "push{q}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+(define_insn "*popdi1_epilogue_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+	(mem:DI (reg:DI SP_REG)))
+   (set (reg:DI SP_REG)
+	(plus:DI (reg:DI SP_REG) (const_int 8)))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+  "pop{q}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "DI")])
+
+(define_insn "popdi1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+	(mem:DI (reg:DI SP_REG)))
+   (set (reg:DI SP_REG)
+	(plus:DI (reg:DI SP_REG) (const_int 8)))]
+  "TARGET_64BIT"
+  "pop{q}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_xor_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && reload_completed"
+  "xor{l}\t%k0, %k0";
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*movdi_or_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "const_int_operand" "i"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && reload_completed
+   && operands[1] == constm1_rtx"
+{
+  operands[1] = constm1_rtx;
+  return "or{q}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*movdi_2"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
+	(match_operand:DI 1 "general_operand"
+			"riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   #
+   #
+   pxor\t%0, %0
+   movq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
+   xorps\t%0, %0
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+;; %%% This multiword shite has got to go.
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
+   && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+	  "=r,r  ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
+	(match_operand:DI 1 "general_operand"
+	  "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r   ,m  ,C ,*x,*Yi,*x,r  ,m ,*Ym,*x"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSECVT:
+      if (SSE_REG_P (operands[0]))
+	return "movq2dq\t{%1, %0|%0, %1}";
+      else
+	return "movdq2q\t{%1, %0|%0, %1}";
+
+    case TYPE_SSEMOV:
+      if (TARGET_AVX)
+	{
+	  if (get_attr_mode (insn) == MODE_TI)
+	    return "vmovdqa\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovq\t{%1, %0|%0, %1}";
+	}
+
+      if (get_attr_mode (insn) == MODE_TI)
+	return "movdqa\t{%1, %0|%0, %1}";
+      /* FALLTHRU */
+
+    case TYPE_MMXMOV:
+      /* Moves from and into integer register is done using movd
+	 opcode with REX prefix.  */
+      if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
+	return "movd\t{%1, %0|%0, %1}";
+      return "movq\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return "%vpxor\t%0, %d0";
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_LEA:
+      return "lea{q}\t{%a1, %0|%0, %a1}";
+
+    default:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else if (which_alternative == 2)
+	return "movabs{q}\t{%1, %0|%0, %1}";
+      else
+	return "mov{q}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "5")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "6,7,8,9,10")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "11")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "12,13,14,15,16")
+	      (const_string "ssemov")
+	    (eq_attr "alternative" "17,18")
+	      (const_string "ssecvt")
+	    (eq_attr "alternative" "4")
+	      (const_string "multi")
+ 	    (match_operand:DI 1 "pic_32bit_operand" "")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsdi_1_rex64"
+  [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:DI 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{q}\t{%1, %P0|%P0, %1}
+   mov{q}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movabsdi_2_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a,r")
+        (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{q}\t{%P1, %0|%0, %P1}
+   mov{q}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "DI")])
+
+;; Convert impossible stores of immediate to existing instructions.
+;; First try to get scratch register and go through it.  In case this
+;; fails, move by 32bit parts.
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode) && 1"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)
+   && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
+
+(define_insn "*swapdi_rex64"
+  [(set (match_operand:DI 0 "register_operand" "+r")
+	(match_operand:DI 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_64BIT"
+  "xchg{q}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "DI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])
+
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "")
+	(match_operand:OI 1 "general_operand" ""))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vxorps\t%0, %0, %0";
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	return "vmovdqu\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE || TARGET_64BIT"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else if (push_operand (operands[0], TImode))
+    ix86_expand_push (TImode, operands[1]);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
+
+(define_insn "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 1:
+    case 2:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		    (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)))
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "2")
+		    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			(const_int 0)))
+		 (const_string "V4SF")]
+	      (const_string "TI")))])
+
+(define_insn "*movti_rex64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
+	(match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "2,3")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "4")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "reload_completed && !SSE_REG_P (operands[0])
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern.  Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+  [(set (match_operand:CDI 0 "nonimmediate_operand" "")
+	(match_operand:CDI 1 "general_operand" ""))]
+  ""
+{
+  if (push_operand (operands[0], CDImode))
+    emit_move_complex_push (CDImode, operands[0], operands[1]);
+  else
+    emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (SFmode, operands); DONE;")
+
+(define_insn "*pushsf"
+  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+	(match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))]
+  "!TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{l}\t%1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,SI,SF")])
+
+(define_insn "*pushsf_rex64"
+  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+  "TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{q}\t%q1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,DI,SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0)
+	(match_dup 2))])
+
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "any_fp_register_operand" ""))]
+  "!TARGET_64BIT"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (mem:SF (reg:SI SP_REG)) (match_dup 1))])
+
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "any_fp_register_operand" ""))]
+  "TARGET_64BIT"
+  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+   (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
+
+(define_insn "*movsf_1"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
+	(match_operand:SF 1 "general_operand"
+	  "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]))
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "mov{l}\t{%1, %0|%0, %1}";
+    case 5:
+      if (get_attr_mode (insn) == MODE_TI)
+	return "%vpxor\t%0, %d0";
+      else
+	return "%vxorps\t%0, %d0";
+    case 6:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovss\t{%1, %d0|%d0, %1}";
+    case 7:
+      if (TARGET_AVX)
+	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+				   : "vmovss\t{%1, %0|%0, %1}";
+      else
+	return "movss\t{%1, %0|%0, %1}";
+    case 8:
+      return "%vmovss\t{%1, %0|%0, %1}";
+
+    case 9: case 10: case 14: case 15:
+      return "movd\t{%1, %0|%0, %1}";
+    case 12: case 13:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    case 11:
+      return "movq\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "3,4,9,10")
+		 (const_string "SI")
+	       (eq_attr "alternative" "5")
+		 (if_then_else
+		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    	 (const_int 0))
+			     (ne (symbol_ref "TARGET_SSE2")
+				 (const_int 0)))
+			(eq (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "TI")
+		   (const_string "V4SF"))
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APS move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  Do the same for architectures resolving dependencies on
+		  the parts.  While in DF mode it is better to always handle
+		  just register parts, the SF mode is different due to lack
+		  of instructions to load just part of the register.  It is
+		  better to maintain the whole registers in single format
+		  to avoid problems on using packed logical operations.  */
+	       (eq_attr "alternative" "6")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			    (const_int 0))
+			(ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "SF"))
+	       (eq_attr "alternative" "11")
+		 (const_string "DI")]
+	       (const_string "SF")))])
+
+(define_insn "*swapsf"
+  [(set (match_operand:SF 0 "fp_register_operand" "+f")
+	(match_operand:SF 1 "fp_register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "reload_completed || TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "SF")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (DFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter.  Allow this
+;; pattern for optimize_size too.
+
+(define_insn "*pushdf_nointeger"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
+  "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*,*")
+   (set_attr "mode" "DF,SI,SI,DF")])
+
+(define_insn "*pushdf_integer"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
+  "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "DF,SI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "any_fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (match_dup 1))]
+  "")
+
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+;; Moving is usually shorter when only FP registers are used. This separate
+;; movdf pattern avoids the use of integer registers for FP operations
+;; when optimizing for size.
+
+(define_insn "*movdf_nointeger"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+			"=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
+	(match_operand:DF 1 "general_operand"
+			"fm,f,G,*roF,*Fr,C   ,Y2*x,mY2*x,Y2*x"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ((optimize_function_for_size_p (cfun)
+       || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+           && !memory_operand (operands[0], DFmode)
+	   && standard_80387_constant_p (operands[1]))
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || ((optimize_function_for_size_p (cfun)
+            || !TARGET_MEMORY_MISMATCH_STALL
+	    || reload_in_progress || reload_completed)
+ 	   && memory_operand (operands[0], DFmode)))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
+	case MODE_V2SF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "SI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_insn "*movdf_integer_rex64"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]))
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], DFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  return "%vmovlpd\t{%1, %d0|%d0, %1}";
+	case MODE_V2SF:
+	  return "%vmovlps\t{%1, %d0|%d0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case 9:
+    case 10:
+    return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4,9,10")
+		 (const_string "DI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_insn "*movdf_integer"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && optimize_function_for_speed_p (cfun)
+   && TARGET_INTEGER_DFMODE_MOVES
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]))
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], DFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "xorps\t%0, %0";
+	case MODE_V2DF:
+	  return "xorpd\t%0, %0";
+	case MODE_TI:
+	  return "pxor\t%0, %0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "movaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  return "movapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  return "movdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "movq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  return "movlpd\t{%1, %0|%0, %1}";
+	case MODE_V2SF:
+	  return "movlps\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "SI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_split
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ! (ANY_FP_REG_P (operands[0]) ||
+	 (GET_CODE (operands[0]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+   && ! (ANY_FP_REG_P (operands[1]) ||
+	 (GET_CODE (operands[1]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*swapdf"
+  [(set (match_operand:DF 0 "fp_register_operand" "+f")
+	(match_operand:DF 1 "fp_register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "reload_completed || TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "DF")])
+
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (XFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 3+3*memory operand size
+;; Pushing using integer instructions is longer except for constants
+;; and direct memory references.
+;; (assuming that any given constant is pushed only once, but this ought to be
+;;  handled elsewhere).
+
+(define_insn "*pushxf_nointeger"
+  [(set (match_operand:XF 0 "push_operand" "=X,X,X")
+	(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+  "optimize_function_for_size_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "XF,SI,SI")])
+
+(define_insn "*pushxf_integer"
+  [(set (match_operand:XF 0 "push_operand" "=<,<")
+	(match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
+  "optimize_function_for_speed_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*")
+   (set_attr "mode" "XF,SI")])
+
+(define_split
+  [(set (match_operand 0 "push_operand" "")
+	(match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !ANY_FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(match_operand:XF 1 "any_fp_register_operand" ""))]
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
+  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+;; Do not use integer registers when optimizing for size
+(define_insn "*movxf_nointeger"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
+  "optimize_function_for_size_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || standard_80387_constant_p (operands[1])
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_insn "*movxf_integer"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
+  "optimize_function_for_speed_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE2"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
+	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  "TARGET_SSE2
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovdqa\t{%1, %0|%0, %1}";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+	return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,2")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "1")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_insn "*pushtf_sse"
+  [(set (match_operand:TF 0 "push_operand" "=<,<,<")
+	(match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
+  "TARGET_SSE2"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "sse,*,*")
+   (set_attr "mode" "TF,SI,SI")])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  "TARGET_SSE2 && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "any_fp_register_operand" ""))]
+  "TARGET_SSE2"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+   (set (mem:TF (reg:P SP_REG)) (match_dup 1))]
+  "")
+
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand" "")
+	(match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && GET_MODE (operands[0]) == XFmode
+   && ! (ANY_FP_REG_P (operands[0]) ||
+	 (GET_CODE (operands[0]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+   && ! (ANY_FP_REG_P (operands[1]) ||
+	 (GET_CODE (operands[1]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == SFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  rtx r = operands[0];
+
+  if (GET_CODE (r) == SUBREG)
+    r = SUBREG_REG (r);
+
+  if (SSE_REG_P (r))
+    {
+      if (!standard_sse_constant_p (c))
+	FAIL;
+    }
+  else if (FP_REG_P (r))
+    {
+      if (!standard_80387_constant_p (c))
+	FAIL;
+    }
+  else if (MMX_REG_P (r))
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(float_extend (match_operand 1 "memory_operand" "")))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == SFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  rtx r = operands[0];
+
+  if (GET_CODE (r) == SUBREG)
+    r = SUBREG_REG (r);
+
+  if (SSE_REG_P (r))
+    {
+      if (!standard_sse_constant_p (c))
+	FAIL;
+    }
+  else if (FP_REG_P (r))
+    {
+      if (!standard_80387_constant_p (c))
+	FAIL;
+    }
+  else if (MMX_REG_P (r))
+    FAIL;
+})
+
+(define_insn "swapxf"
+  [(set (match_operand:XF 0 "register_operand" "+f")
+	(match_operand:XF 1 "register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "XF")])
+
+;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(match_operand:X87MODEF 1 "immediate_operand" ""))]
+  "reload_completed && FP_REGNO_P (REGNO (operands[0]))
+   && (standard_80387_constant_p (operands[1]) == 8
+       || standard_80387_constant_p (operands[1]) == 9)"
+  [(set (match_dup 0)(match_dup 1))
+   (set (match_dup 0)
+	(neg:X87MODEF (match_dup 0)))]
+{
+  REAL_VALUE_TYPE r;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  if (real_isnegzero (&r))
+    operands[1] = CONST0_RTX (<MODE>mode);
+  else
+    operands[1] = CONST1_RTX (<MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed
+   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+;; Zero extension instructions
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (HImode, operands[1]);
+      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "zero_extendhisi2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+     (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && TARGET_ZERO_EXTEND_WITH_AND
+   && optimize_function_for_speed_p (cfun)"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_insn "*zero_extendhisi2_movzwl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "!TARGET_ZERO_EXTEND_WITH_AND
+   || optimize_function_for_size_p (cfun)"
+  "movz{wl|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqihi2"
+  [(parallel
+    [(set (match_operand:HI 0 "register_operand" "")
+       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+(define_insn "*zero_extendqihi2_and"
+  [(set (match_operand:HI 0 "register_operand" "=r,?&q")
+     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "HI")])
+
+(define_insn "*zero_extendqihi2_movzbw_and"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+  "#"
+  [(set_attr "type" "imovx,alu1")
+   (set_attr "mode" "HI")])
+
+; zero extend to SImode here to avoid partial register stalls
+(define_insn "*zero_extendqihi2_movzbl"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+;; For the movzbw case strip only the clobber
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND
+       || optimize_function_for_size_p (cfun))
+   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && ANY_QI_REG_P (operands[0])
+   && (TARGET_ZERO_EXTEND_WITH_AND
+       && optimize_function_for_speed_p (cfun))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_expand "zero_extendqisi2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+       (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+(define_insn "*zero_extendqisi2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r,?&q")
+     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw_and"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+  "#"
+  [(set_attr "type" "imovx,alu1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+     (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  "movz{bl|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+;; For the movzbl case strip only the clobber
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+  [(set (match_dup 0)
+	(zero_extend:SI (match_dup 1)))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && ANY_QI_REG_P (operands[0])
+   && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
+   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+;; %%% Kill me once multi-word ops are sane.
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+     (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_64BIT)
+    {
+      emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "zero_extendsidi2_32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2")
+	(zero_extend:DI
+	 (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "@
+   #
+   #
+   #
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
+
+(define_insn "zero_extendsidi2_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2")
+     (zero_extend:DI
+       (match_operand:SI 1 "nonimmediate_operand"  "rm,0,r   ,m  ,r   ,m")))]
+  "TARGET_64BIT"
+  "@
+   mov\t{%k1, %k0|%k0, %k1}
+   #
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+     (zero_extend:DI (match_dup 0)))]
+  "TARGET_64BIT"
+  [(set (match_dup 4) (const_int 0))]
+  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 4) (const_int 0))]
+  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed
+   && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (const_int 0))]
+  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+     (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_64BIT"
+  "movz{wl|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+     (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_64BIT"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+;; Sign extension instructions
+
+(define_expand "extendsidi2"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (match_scratch:SI 2 ""))])]
+  ""
+{
+  if (TARGET_64BIT)
+    {
+      emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*extendsidi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+  "!TARGET_64BIT"
+  "#")
+
+(define_insn "extendsidi2_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
+  "TARGET_64BIT"
+  "@
+   {cltq|cdqe}
+   movs{lq|x}\t{%1,%0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")
+   (set_attr "prefix_0f" "0")
+   (set_attr "modrm" "0,1")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_64BIT"
+  "movs{wq|x}\t{%1,%0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "TARGET_64BIT"
+  "movs{bq|x}\t{%1,%0|%0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "DI")])
+
+;; Extend to memory case when source register does die.
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(reload_completed
+    && dead_or_set_p (insn, operands[1])
+    && !reg_mentioned_p (operands[1], operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 4) (match_dup 1))]
+  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+;; Extend to memory case when source register does not die.
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+  emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[1]) == AX_REG
+      && true_regnum (operands[2]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31)));
+    }
+  else
+    {
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31)));
+    }
+  emit_move_insn (operands[4], operands[2]);
+  DONE;
+})
+
+;; Extend to register case.  Optimize case where source and destination
+;; registers match and cases where we can use cltd.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+  if (true_regnum (operands[3]) != true_regnum (operands[1]))
+    emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[3]) == AX_REG)
+    {
+      emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
+      DONE;
+    }
+
+  if (true_regnum (operands[4]) != true_regnum (operands[1]))
+    emit_move_insn (operands[4], operands[1]);
+
+  emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31)));
+  DONE;
+})
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=*a,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1,%0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "*extendhisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(zero_extend:DI
+	  (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1,%k0|%k0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=*a,r")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cbtw|cbw}";
+    default:
+      return "movs{bw|x}\t{%1,%0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "HI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  ""
+  "movs{bl|x}\t{%1,%0|%0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
+  "TARGET_64BIT"
+  "movs{bl|x}\t{%1,%k0|%k0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+;; Conversions between float and double.
+
+;; These are all no-ops in the model used for the 80387.  So just
+;; emit moves.
+
+;; %%% Kill these when call knows how to work out a DFmode push earlier.
+(define_insn "*dummy_extendsfdf2"
+  [(set (match_operand:DF 0 "push_operand" "=<")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))]
+  "0"
+  "#")
+
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_insn "*dummy_extendsfxf2"
+  [(set (match_operand:XF 0 "push_operand" "=<")
+	(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))]
+  "0"
+  "#")
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+        (float_extend:DF (match_operand:SF 1 "general_operand" "")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+	  && standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    }
+})
+
+/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
+   cvtss2sd:
+      unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtps2pd xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (float_extend:V2DF
+	   (vec_select:V2SF
+	     (match_dup 3)
+	     (parallel [(const_int 0) (const_int 1)]))))]
+{
+  operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+  operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0);
+  /* Use movss for loading from memory, unpcklps reg, reg for registers.
+     Try to avoid move when unpacking can be done in source.  */
+  if (REG_P (operands[1]))
+    {
+      /* If it is unsafe to overwrite upper half of source, we need
+	 to move to destination and unpack there.  */
+      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+	  && true_regnum (operands[0]) != true_regnum (operands[1]))
+	{
+	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+	  emit_move_insn (tmp, operands[1]);
+	}
+      else
+	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
+      emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
+    }
+  else
+    emit_insn (gen_vec_setv4sf_0 (operands[3],
+				  CONST0_RTX (V4SFmode), operands[1]));
+})
+
+(define_insn "*extendsfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "SF,XF,DF")])
+
+(define_insn "*extendsfdf2_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtss2sd\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "*extendsfdf2_i387"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF,XF")])
+
+(define_expand "extend<mode>xf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+        (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))]
+  "TARGET_80387"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if (standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
+    }
+})
+
+(define_insn "*extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:XF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>,XF")])
+
+;; %%% This seems bad bad news.
+;; This cannot output into an f-reg because there is no way to be sure
+;; of truncating in that case.  Otherwise this is just like a simple move
+;; insn.  So we pretend we can output to a reg in order to get better
+;; register preferencing, but we really use a stack slot.
+
+;; Conversion from DFmode to SFmode.
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+    ;
+  else if (flag_unsafe_math_optimizations)
+    ;
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      rtx temp = assign_386_stack_local (SFmode, slot);
+      emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
+      DONE;
+    }
+})
+
+/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
+   cvtsd2ss:
+      unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtpd2ps xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (vec_concat:V4SF
+	   (float_truncate:V2SF
+	     (match_dup 4))
+	   (match_dup 3)))]
+{
+  operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+  operands[3] = CONST0_RTX (V2SFmode);
+  operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0);
+  /* Use movsd for loading from memory, unpcklpd for registers.
+     Try to avoid move when unpacking can be done in source, or SSE3
+     movddup is available.  */
+  if (REG_P (operands[1]))
+    {
+      if (!TARGET_SSE3
+	  && true_regnum (operands[0]) != true_regnum (operands[1])
+	  && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	      || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+	{
+	  rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0);
+	  emit_move_insn (tmp, operands[1]);
+	  operands[1] = tmp;
+	}
+      else if (!TARGET_SSE3)
+	operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+      emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
+    }
+  else
+    emit_insn (gen_sse2_loadlpd (operands[4],
+				 CONST0_RTX (V2DFmode), operands[1]));
+})
+
+(define_expand "truncdfsf2_with_temp"
+  [(parallel [(set (match_operand:SF 0 "" "")
+		   (float_truncate:SF (match_operand:DF 1 "" "")))
+	      (clobber (match_operand:SF 2 "" ""))])]
+  "")
+
+(define_insn "*truncdfsf_fast_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f  ,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,ssecvt")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "SF")])
+
+;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
+;; because nothing we do here is unsafe.
+(define_insn "*truncdfsf_fast_sse"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_fast_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,Y2 ,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,X  ,m ,m ,m"))]
+  "TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "type" "fmov,ssecvt,multi,multi,multi")
+   (set_attr "unit" "*,*,i387,i387,i387")
+   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float_truncate:SF
+	  (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_80387
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !TARGET_MIX_SSE_I387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float_truncate:SF
+	 (match_operand:DF 1 "fp_register_operand" "")))
+   (clobber (match_operand 2 "" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));
+})
+
+;; Conversion from XFmode to {SF,DF}mode
+
+(define_expand "truncxf<mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "")
+		   (float_truncate:MODEF
+		     (match_operand:XF 1 "register_operand" "")))
+	      (clobber (match_dup 2))])]
+  "TARGET_80387"
+{
+  if (flag_unsafe_math_optimizations)
+    {
+      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
+      if (reg != operands[0])
+	emit_move_insn (operands[0], reg);
+      DONE;
+    }
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      operands[2] = assign_386_stack_local (<MODE>mode, slot);
+    }
+})
+
+(define_insn "*truncxfsf2_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"   "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncxfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r")
+	(float_truncate:DF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f  ,f")))
+   (clobber (match_operand:DF 2 "memory_operand"   "=X,m ,m  ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "DF")])
+
+(define_insn "truncxf<mode>2_i387_noop"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*truncxf<mode>2_i387"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+  "TARGET_80387 && reload_completed"
+  [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:MODEF 0 "memory_operand" "")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+  "TARGET_80387"
+  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]
+  "")
+
+;; Signed conversion to DImode.
+
+(define_expand "fix_truncxfdi2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+                   (fix:DI (match_operand:XF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>di2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+                   (fix:DI (match_operand:MODEF 1 "register_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
+{
+  if (TARGET_FISTTP
+      && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
+     emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+                   (fix:SI (match_operand:XF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>si2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	           (fix:SI (match_operand:MODEF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_FISTTP
+      && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+     emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	           (fix:HI (match_operand:X87MODEF 1 "register_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unsigned_fix:SI
+	    (match_operand:MODEF 1 "nonimmediate_operand" "")))
+     (use (match_dup 2))
+     (clobber (match_scratch:<ssevecmode> 3 ""))
+     (clobber (match_scratch:<ssevecmode> 4 ""))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  enum machine_mode mode = <MODE>mode;
+  enum machine_mode vecmode = <ssevecmode>mode;
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  two31 = const_double_from_real_value (TWO31r, mode);
+  two31 = ix86_build_const_vector (mode, true, two31);
+  operands[2] = force_reg (vecmode, two31);
+})
+
+(define_insn_and_split "*fixuns_trunc<mode>_1"
+  [(set (match_operand:SI 0 "register_operand" "=&x,&x")
+	(unsigned_fix:SI
+	  (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
+   (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
+   (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
+   (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_convert_uns_si_sse (operands);
+  DONE;
+})
+
+;; Unsigned conversion to HImode.
+;; Without these patterns, we'll try the unsigned SI conversion which
+;; is complex for SSE, rather than the signed SI conversion, which isn't.
+
+(define_expand "fixuns_trunc<mode>hi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "")))
+   (set (match_operand:HI 0 "nonimmediate_operand" "")
+	(subreg:HI (match_dup 2) 0))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_trunc<mode>di_sse"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
+
+(define_insn "fix_trunc<mode>si_sse"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
+
+;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
+(define_peephole2
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(match_operand:MODEF 1 "memory_operand" ""))
+   (set (match_operand:SSEMODEI24 2 "register_operand" "")
+	(fix:SSEMODEI24 (match_dup 0)))]
+  "TARGET_SHORTEN_X87_SSE
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
+  "")
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+  [(match_scratch:DF 2 "Y2")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+	(fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:SF 2 "x")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+	(fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+  "")
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+							    operands[1],
+							    operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+  [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
+	(fix:X87MODEI (match_operand 1 "register_operand" "f")))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "* return output_fix_trunc (insn, operands, 1);"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+	(fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	&& (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "#"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "register_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "memory_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
+	      (clobber (match_dup 3))])]
+  "")
+
+;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+					 operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+						     operands[2], operands[3],
+						     operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(fix:DI (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fix_truncdi_i387_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(fix:DI (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(fix:DI (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(fix:DI (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])]
+  "")
+
+(define_insn "fix_trunc<mode>_i387"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])]
+  "")
+
+(define_insn "x86_fnstcw_1"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+	(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+  "TARGET_80387"
+  "fnstcw\t%0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")])
+
+(define_insn "x86_fldcw_1"
+  [(set (reg:HI FPCR_REG)
+	(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
+  "TARGET_80387"
+  "fldcw\t%0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])
+
+;; Conversion between fixed point and floating point.
+
+;; Even though we only accept memory inputs, the backend _really_
+;; wants to be able to do this between registers.
+
+(define_expand "floathi<mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "")
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*floathi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+	      (float:X87MODEF (match_dup 1)))
+   (clobber (match_dup 2))])]
+  "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);")
+
+(define_insn "*floathi<mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:HI 2 "memory_operand" "=m,m"))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "#"
+  [(set_attr "type" "fmov,multi")
+   (set_attr "mode" "<MODE>")
+   (set_attr "unit" "*,i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floathi<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "register_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "memory_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+   "TARGET_80387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387)
+    && reload_completed"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
+(define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+  "")
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*float<SSEMODEI24:mode><X87MODEF:mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))]
+  "((TARGET_80387
+     && (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+	   && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+	 || TARGET_MIX_SSE_I387))
+    || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+	&& SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+	&& ((<SSEMODEI24:MODE>mode == SImode
+	     && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS
+	     && optimize_function_for_speed_p (cfun)
+	     && flag_trapping_math)
+	    || !(TARGET_INTER_UNIT_CONVERSIONS
+	         || optimize_function_for_size_p (cfun)))))
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
+	      (clobber (match_dup 2))])]
+{
+  operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+  /* Avoid store forwarding (partial memory) stall penalty
+     by passing DImode value through XMM registers.  */
+  if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+      && optimize_function_for_speed_p (cfun))
+    {
+      emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+							    operands[1],
+							    operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
+	(float:MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
+   (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "unit" "*,i387,*,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct,double")
+   (set_attr "amdfam10_decode" "*,*,vector,double,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "@
+   fild%z1\t%1
+   #"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "<MODE>,<ssevecmode>")
+   (set_attr "unit" "i387,*")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "*,i387,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && TARGET_INTER_UNIT_CONVERSIONS
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "@
+   fild%z1\t%1
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "athlon_decode" "*,double,direct")
+   (set_attr "amdfam10_decode" "*,vector,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "@
+   fild%z1\t%1
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x")
+	(float:MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "athlon_decode" "double,direct,double")
+   (set_attr "amdfam10_decode" "vector,double,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+				  CONST0_RTX (V4SImode), operands[1]));
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_sse2_loadld (operands[4],
+				  CONST0_RTX (V4SImode), operands[2]));
+    }
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+			      CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+				  CONST0_RTX (V4SImode), operands[1]));
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    gcc_unreachable ();
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+			      CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))]
+  "TARGET_80387"
+  "@
+   fild%z1\t%1
+   #"
+  [(set_attr "type" "fmov,multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "*,i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "TARGET_80387"
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=X,x"))
+   (clobber (match_scratch:V4SI 4 "=X,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:SI 3 "=X,x"))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+{
+  emit_move_insn (operands[3], operands[1]);
+  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
+(define_expand "floatunssi<mode>2"
+  [(parallel
+     [(set (match_operand:X87MODEF 0 "register_operand" "")
+	   (unsigned_float:X87MODEF
+	     (match_operand:SI 1 "nonimmediate_operand" "")))
+      (clobber (match_dup 2))
+      (clobber (match_scratch:SI 3 ""))])]
+  "!TARGET_64BIT
+   && ((TARGET_80387 && TARGET_SSE)
+       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+      DONE;
+    }
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      operands[2] = assign_386_stack_local (DImode, slot);
+    }
+})
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT && TARGET_SSE_MATH"
+  "x86_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+  "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+   && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  if (TARGET_64BIT)
+    x86_emit_floatuns (operands);
+  else
+    ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+  DONE;
+})
+
+;; Add instructions
+
+;; %%% splits for addditi3
+
+(define_expand "addti3"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+		 (match_operand:TI 2 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
+
+(define_insn "*addti3_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+		 (match_operand:TI 2 "x86_64_general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+					  UNSPEC_ADD_CARRY))
+	      (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+				     (match_dup 4))
+			    (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
+
+;; %%% splits for addsidi3
+;  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+;	(plus:DI (match_operand:DI 1 "general_operand" "")
+;		 (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))]
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		 (match_operand:DI 2 "x86_64_general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;")
+
+(define_insn "*adddi3_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:DI 2 "general_operand" "roiF,riF")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+					  UNSPEC_ADD_CARRY))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+				     (match_dup 4))
+			    (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "adddi3_carry_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	  (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+			    (match_operand:DI 1 "nonimmediate_operand" "%0,0"))
+		   (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "adc{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*adddi3_cc_rex64"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		    (match_operand:DI 2 "x86_64_general_operand" "re,rm")]
+		   UNSPEC_ADD_CARRY))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "add{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*<plusminus_insn><mode>3_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plusminus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	    (match_dup 1)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plusminus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+		(plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0")
+			  (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+		(match_dup 1)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	     (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+			(match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+	     (match_dup 0)))]
+  ""
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+			  (match_operand:SI 2 "general_operand" "g"))
+	    (match_dup 1)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "addqi3_carry"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	  (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+			    (match_operand:QI 1 "nonimmediate_operand" "%0,0"))
+		   (match_operand:QI 2 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+  "adc{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "QI")])
+
+(define_insn "addhi3_carry"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	  (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+			    (match_operand:HI 1 "nonimmediate_operand" "%0,0"))
+		   (match_operand:HI 2 "general_operand" "rn,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, HImode, operands)"
+  "adc{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "HI")])
+
+(define_insn "addsi3_carry"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	  (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+			    (match_operand:SI 1 "nonimmediate_operand" "%0,0"))
+		   (match_operand:SI 2 "general_operand" "ri,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "adc{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	  (zero_extend:DI
+	    (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+			      (match_operand:SI 1 "nonimmediate_operand" "%0"))
+		     (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "adc{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
+		    (match_operand:SI 2 "general_operand" "ri,rm")]
+		   UNSPEC_ADD_CARRY))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "addqi3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		    (match_operand:QI 2 "general_operand" "qn,qm")]
+		   UNSPEC_ADD_CARRY))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(plus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+  "add{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;")
+
+(define_insn "*lea_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "no_seg_address_operand" "p"))]
+  "!TARGET_64BIT"
+  "lea{l}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_rex64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))]
+  "TARGET_64BIT"
+  "lea{l}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
+  "TARGET_64BIT"
+  "lea{l}\t{%a1, %k0|%k0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lea_2_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "no_seg_address_operand" "p"))]
+  "TARGET_64BIT"
+  "lea{q}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")])
+
+;; The lea patterns for non-Pmodes needs to be matched by several
+;; insns converted to real lea by splitters.
+
+(define_insn_and_split "*lea_general_1"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "register_operand" "r"))
+	      (match_operand 3 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[2])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l")
+			    (match_operand:SI 2 "register_operand" "r"))
+		   (match_operand:SI 3 "immediate_operand" "i"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (mult (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "const248_operand" "i"))
+	      (match_operand 3 "nonmemory_operand" "ri")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l")
+			    (match_operand:SI 2 "const248_operand" "n"))
+		   (match_operand:SI 3 "nonmemory_operand" "ri"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (mult (match_operand 1 "index_register_operand" "l")
+			  (match_operand 2 "const248_operand" "i"))
+		    (match_operand 3 "register_operand" "r"))
+	      (match_operand 4 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+  pat = gen_rtx_PLUS (Pmode,
+  		      gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
+		      					 operands[2]),
+				    operands[3]),
+  		      operands[4]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI (mult:SI
+			      (match_operand:SI 1 "index_register_operand" "l")
+			      (match_operand:SI 2 "const248_operand" "n"))
+			    (match_operand:SI 3 "register_operand" "r"))
+		   (match_operand:SI 4 "immediate_operand" "i"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
+							      (match_dup 2))
+						     (match_dup 3))
+					    (match_dup 4)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*adddi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
+		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{q}\t{%a2, %0|%0, %a2}";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{q}\t{%2, %0|%0, %2}";
+        }
+      return "add{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    ; Current assemblers are broken and do not allow @GOTOFF in
+	    ; ought but a memory context.
+	    (match_operand:DI 2 "pic_symbolic_operand" "")
+	      (const_string "lea")
+	    (match_operand:DI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 1)
+		 (match_dup 2)))]
+  "")
+
+(define_insn "*adddi_2_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:DI 2 "x86_64_general_operand" "rme,re"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, DImode, operands)
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* ???? We ought to handle there the 32bit case too
+	 - do we need new constraint?  */
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{q}\t{%2, %0|%0, %2}";
+        }
+      return "add{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*adddi_3_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme"))
+		 (match_operand:DI 1 "x86_64_general_operand" "%0")))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* ???? We ought to handle there the 32bit case too
+	 - do we need new constraint?  */
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{q}\t{%2, %0|%0, %2}";
+        }
+      return "add{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "DI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; This pattern also don't hold of 0x8000000000000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*adddi_4_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:DI 1 "nonimmediate_operand" "0")
+		 (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+   (clobber (match_scratch:DI 0 "=rm"))]
+  "TARGET_64BIT
+   &&  ix86_match_ccmode (insn, CCGCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if ((INTVAL (operands[2]) == -128
+	   || (INTVAL (operands[2]) > 0
+	       && INTVAL (operands[2]) != 128))
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
+	return "sub{q}\t{%2, %0|%0, %2}";
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+      return "add{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*adddi_5_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+		   (match_operand:DI 2 "x86_64_general_operand" "rme"))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+          gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{q}\t{%2, %0|%0, %2}";
+        }
+      return "add{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "DI")])
+
+
+(define_insn "*addsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
+		 (match_operand:SI 2 "general_operand" "g,ri,li")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{l}\t{%a2, %0|%0, %a2}";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%0";
+      else
+	{
+  	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %0|%0, %2}";
+        }
+      return "add{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    ; Current assemblers are broken and do not allow @GOTOFF in
+	    ; ought but a memory context.
+	    (match_operand:SI 2 "pic_symbolic_operand" "")
+	      (const_string "lea")
+	    (match_operand:SI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(plus (match_operand 1 "register_operand" "")
+              (match_operand 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  rtx pat;
+  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+     may confuse gen_lowpart.  */
+  if (GET_MODE (operands[0]) != Pmode)
+    {
+      operands[1] = gen_lowpart (Pmode, operands[1]);
+      operands[2] = gen_lowpart (Pmode, operands[2]);
+    }
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload.  This allow
+;; patterns constructed from addsi_1 to match.
+(define_insn "addsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "general_operand" "g,li"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{l}\t{%a2, %k0|%k0, %a2}";
+
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %k0|%k0, %2}";
+        }
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    ; Current assemblers are broken and do not allow @GOTOFF in
+	    ; ought but a memory context.
+	    (match_operand:SI 2 "pic_symbolic_operand" "")
+	      (const_string "lea")
+	    (match_operand:SI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+})
+
+(define_insn "*addsi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:SI 2 "general_operand" "g,ri"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %0|%0, %2}";
+        }
+      return "add{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		   (match_operand:SI 2 "general_operand" "g"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %k0|%k0, %2}";
+        }
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi_3"
+  [(set (reg FLAGS_REG)
+	(compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
+		 (match_operand:SI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %0|%0, %2}";
+        }
+      return "add{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
+		 (match_operand:SI 1 "nonimmediate_operand" "%0")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %k0|%k0, %2}";
+        }
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; This pattern also don't hold of 0x80000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*addsi_4"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_scratch:SI 0 "=rm"))]
+  "ix86_match_ccmode (insn, CCGCmode)
+   && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{l}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{l}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if ((INTVAL (operands[2]) == -128
+	   || (INTVAL (operands[2]) > 0
+	       && INTVAL (operands[2]) != 128)))
+	return "sub{l}\t{%2, %0|%0, %2}";
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+      return "add{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		   (match_operand:SI 2 "general_operand" "g"))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{l}\t{%2, %0|%0, %2}";
+        }
+      return "add{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+(define_expand "addhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		 (match_operand:HI 2 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;")
+
+;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
+;; type optimizations enabled by define-splits.  This is not important
+;; for PII, and in fact harmful because of partial register stalls.
+
+(define_insn "*addhi_1_lea"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
+		 (match_operand:HI 2 "general_operand" "rn,rm,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (eq_attr "alternative" "2")
+	(const_string "lea")
+	(if_then_else (match_operand:HI 2 "incdec_operand" "")
+	   (const_string "incdec")
+	   (const_string "alu"))))
+   (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*addhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "general_operand" "rn,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*addhi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:HI 2 "general_operand" "rmn,rn"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+	(plus:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*addhi_3"
+  [(set (reg FLAGS_REG)
+	(compare (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
+		 (match_operand:HI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "HI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addhi_4"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:HI 1 "nonimmediate_operand" "0")
+		 (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (match_scratch:HI 0 "=rm"))]
+  "ix86_match_ccmode (insn, CCGCmode)
+   && (INTVAL (operands[2]) & 0xffff) != 0x8000"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{w}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{w}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if ((INTVAL (operands[2]) == -128
+	   || (INTVAL (operands[2]) > 0
+	       && INTVAL (operands[2]) != 128)))
+	return "sub{w}\t{%2, %0|%0, %2}";
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "SI")])
+
+
+(define_insn "*addhi_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:HI 2 "general_operand" "rmn"))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "HI")])
+
+(define_expand "addqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		 (match_operand:QI 2 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*addqi_1_lea"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
+		 (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  int widen = (which_alternative == 2);
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (eq_attr "alternative" "3")
+	(const_string "lea")
+	(if_then_else (match_operand:QI 2 "incdec_operand" "")
+	   (const_string "incdec")
+	   (const_string "alu"))))
+   (set_attr "mode" "QI,QI,SI,SI")])
+
+(define_insn "*addqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		 (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  int widen = (which_alternative == 2);
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*addqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_operand" "qn,qnm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[1] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.  */
+      if (CONST_INT_P (operands[1])
+	  && INTVAL (operands[1]) < 0)
+	{
+	  operands[1] = GEN_INT (-INTVAL (operands[1]));
+	  return "sub{b}\t{%1, %0|%0, %1}";
+	}
+      return "add{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 1 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu1")))
+   (set (attr "memory")
+     (if_then_else (match_operand 1 "memory_operand" "")
+        (const_string "load")
+        (const_string "none")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:QI 2 "general_operand" "qmn,qn"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+	(plus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+		          && INTVAL (operands[2]) == 255));
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
+      if (CONST_INT_P (operands[2])
+          && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_3"
+  [(set (reg FLAGS_REG)
+	(compare (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
+		 (match_operand:QI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+			  && INTVAL (operands[2]) == 255));
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
+      if (CONST_INT_P (operands[2])
+          && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addqi_4"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:QI 1 "nonimmediate_operand" "0")
+		 (match_operand:QI 2 "const_int_operand" "n")))
+   (clobber (match_scratch:QI 0 "=qm"))]
+  "ix86_match_ccmode (insn, CCGCmode)
+   && (INTVAL (operands[2]) & 0xff) != 0x80"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx
+	  || (CONST_INT_P (operands[2])
+	      && INTVAL (operands[2]) == 255))
+        return "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{b}\t%0";
+	}
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (INTVAL (operands[2]) < 0)
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "add{b}\t{%2, %0|%0, %2}";
+        }
+      return "sub{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+
+(define_insn "*addqi_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		   (match_operand:QI 2 "general_operand" "qmn"))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+			  && INTVAL (operands[2]) == 255));
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
+      if (CONST_INT_P (operands[2])
+          && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+
+(define_insn "addqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand:QI 2 "general_operand" "Qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%h0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+			  && INTVAL (operands[2]) == 255));
+          return "dec{b}\t%h0";
+	}
+
+    default:
+      return "add{b}\t{%2, %h0|%h0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand:QI 2 "nonmemory_operand" "Qn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%h0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+			  && INTVAL (operands[2]) == 255));
+          return "dec{b}\t%h0";
+        }
+
+    default:
+      return "add{b}\t{%2, %h0|%h0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "add{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "addxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(plus:XF (match_operand:XF 1 "register_operand" "")
+		 (match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387"
+  "")
+
+(define_expand "add<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(plus:MODEF (match_operand:MODEF 1 "register_operand" "")
+		    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "")
+
+;; Subtract instructions
+
+;; %%% splits for subditi3
+
+(define_expand "subti3"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+		  (match_operand:TI 2 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
+
+(define_insn "*subti3_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+		  (match_operand:TI 2 "x86_64_general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:DI (match_dup 4)
+			     (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+				      (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
+
+;; %%% splits for subsidi3
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		  (match_operand:DI 2 "x86_64_general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;")
+
+(define_insn "*subdi3_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:DI 2 "general_operand" "roiF,riF")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		  (match_operand:DI 2 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:SI (match_dup 4)
+			     (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+				      (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "subdi3_carry_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+	    (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+	       (match_operand:DI 2 "x86_64_general_operand" "re,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "sbb{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*subdi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "sub{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*subdi_2_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "sub{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*subdi_3_rex63"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "subqi3_carry"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+	    (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+	       (match_operand:QI 2 "general_operand" "qn,qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, QImode, operands)"
+  "sbb{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "QI")])
+
+(define_insn "subhi3_carry"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	  (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+	    (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+	       (match_operand:HI 2 "general_operand" "rn,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, HImode, operands)"
+  "sbb{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "HI")])
+
+(define_insn "subsi3_carry"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+	    (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+	       (match_operand:SI 2 "general_operand" "ri,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sbb{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_insn "subsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	  (zero_extend:DI
+	    (minus:SI (match_operand:SI 1 "register_operand" "0")
+	      (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+		 (match_operand:SI 2 "general_operand" "g")))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sbb{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		  (match_operand:SI 2 "general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;")
+
+(define_insn "*subsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:SI 2 "general_operand" "ri,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:SI 2 "general_operand" "ri,rm"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "general_operand" "g"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:SI 2 "general_operand" "ri,rm")))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "general_operand" "g")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %1|%1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_expand "subhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		  (match_operand:HI 2 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;")
+
+(define_insn "*subhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:HI 2 "general_operand" "rn,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, HImode, operands)"
+  "sub{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*subhi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:HI 2 "general_operand" "rn,rm"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(minus:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, HImode, operands)"
+  "sub{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*subhi_3"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:HI 2 "general_operand" "rn,rm")))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(minus:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, HImode, operands)"
+  "sub{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_expand "subqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		  (match_operand:QI 2 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;")
+
+(define_insn "*subqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:QI 2 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, QImode, operands)"
+  "sub{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*subqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "sub{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*subqi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:QI 2 "general_operand" "qn,qm"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(minus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, QImode, operands)"
+  "sub{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*subqi_3"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:QI 2 "general_operand" "qn,qm")))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(minus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, QImode, operands)"
+  "sub{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "subxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(minus:XF (match_operand:XF 1 "register_operand" "")
+		  (match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387"
+  "")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(minus:MODEF (match_operand:MODEF 1 "register_operand" "")
+		     (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "")
+
+;; Multiply instructions
+
+(define_expand "muldi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mult:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "x86_64_general_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+;; On AMDFAM10
+;; IMUL reg64, reg64, imm8 	Direct
+;; IMUL reg64, mem64, imm8 	VectorPath
+;; IMUL reg64, reg64, imm32 	Direct
+;; IMUL reg64, mem64, imm32 	VectorPath
+;; IMUL reg64, reg64 		Direct
+;; IMUL reg64, mem64 		Direct
+
+(define_insn "*muldi3_1_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
+		 (match_operand:DI 2 "x86_64_general_operand" "K,e,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{q}\t{%2, %1, %0|%0, %1, %2}
+   imul{q}\t{%2, %1, %0|%0, %1, %2}
+   imul{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "mode" "DI")])
+
+(define_expand "mulsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (mult:SI (match_operand:SI 1 "register_operand" "")
+			    (match_operand:SI 2 "general_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+;; On AMDFAM10
+;; IMUL reg32, reg32, imm8 	Direct
+;; IMUL reg32, mem32, imm8 	VectorPath
+;; IMUL reg32, reg32, imm32 	Direct
+;; IMUL reg32, mem32, imm32 	VectorPath
+;; IMUL reg32, reg32 		Direct
+;; IMUL reg32, mem32 		Direct
+
+(define_insn "*mulsi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+		 (match_operand:SI 2 "general_operand" "K,i,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{l}\t{%2, %1, %0|%0, %1, %2}
+   imul{l}\t{%2, %1, %0|%0, %1, %2}
+   imul{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*mulsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+		   (match_operand:SI 2 "general_operand" "K,i,mr"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "mode" "SI")])
+
+(define_expand "mulhi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (mult:HI (match_operand:HI 1 "register_operand" "")
+			    (match_operand:HI 2 "general_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_HIMODE_MATH"
+  "")
+
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 	VectorPath
+;; IMUL reg16, mem16, imm8 	VectorPath
+;; IMUL reg16, reg16, imm16 	VectorPath
+;; IMUL reg16, mem16, imm16 	VectorPath
+;; IMUL reg16, reg16 		Direct
+;; IMUL reg16, mem16 		Direct
+(define_insn "*mulhi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
+		 (match_operand:HI 2 "general_operand" "K,n,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1,2")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(eq_attr "alternative" "0,1")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "mode" "HI")])
+
+(define_expand "mulqi3"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (mult:QI (match_operand:QI 1 "nonimmediate_operand" "")
+			    (match_operand:QI 2 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+  "")
+
+;;On AMDFAM10
+;; MUL reg8 	Direct
+;; MUL mem8 	Direct
+
+(define_insn "*mulqi3_1"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "umulqihi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (mult:HI (zero_extend:HI
+			      (match_operand:QI 1 "nonimmediate_operand" ""))
+			    (zero_extend:HI
+			      (match_operand:QI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+  "")
+
+(define_insn "*umulqihi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+		 (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "mulqihi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))
+			    (sign_extend:HI (match_operand:QI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+  "")
+
+(define_insn "*mulqihi3_insn"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+		 (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "umulditi3"
+  [(parallel [(set (match_operand:TI 0 "register_operand" "")
+		   (mult:TI (zero_extend:TI
+			      (match_operand:DI 1 "nonimmediate_operand" ""))
+			    (zero_extend:TI
+			      (match_operand:DI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*umulditi3_insn"
+  [(set (match_operand:TI 0 "register_operand" "=A")
+	(mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+		 (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "DI")])
+
+;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
+(define_expand "umulsidi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mult:DI (zero_extend:DI
+			      (match_operand:SI 1 "nonimmediate_operand" ""))
+			    (zero_extend:DI
+			      (match_operand:SI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT"
+  "")
+
+(define_insn "*umulsidi3_insn"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+		 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+(define_expand "mulditi3"
+  [(parallel [(set (match_operand:TI 0 "register_operand" "")
+		   (mult:TI (sign_extend:TI
+			      (match_operand:DI 1 "nonimmediate_operand" ""))
+			    (sign_extend:TI
+			      (match_operand:DI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*mulditi3_insn"
+  [(set (match_operand:TI 0 "register_operand" "=A")
+	(mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+		 (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "DI")])
+
+(define_expand "mulsidi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mult:DI (sign_extend:DI
+			      (match_operand:SI 1 "nonimmediate_operand" ""))
+			    (sign_extend:DI
+			      (match_operand:SI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT"
+  "")
+
+(define_insn "*mulsidi3_insn"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+		 (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+(define_expand "umuldi3_highpart"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (truncate:DI
+		     (lshiftrt:TI
+		       (mult:TI (zero_extend:TI
+				  (match_operand:DI 1 "nonimmediate_operand" ""))
+				(zero_extend:TI
+				  (match_operand:DI 2 "register_operand" "")))
+		       (const_int 64))))
+	      (clobber (match_scratch:DI 3 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*umuldi3_highpart_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI
+		       (match_operand:DI 1 "nonimmediate_operand" "%a"))
+		     (zero_extend:TI
+		       (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	    (const_int 64))))
+   (clobber (match_scratch:DI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "DI")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (truncate:SI
+		     (lshiftrt:DI
+		       (mult:DI (zero_extend:DI
+				  (match_operand:SI 1 "nonimmediate_operand" ""))
+				(zero_extend:DI
+				  (match_operand:SI 2 "register_operand" "")))
+		       (const_int 32))))
+	      (clobber (match_scratch:SI 3 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+(define_insn "*umulsi3_highpart_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (zero_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umulsi3_highpart_zext"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI (truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (zero_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32)))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+(define_expand "smuldi3_highpart"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (truncate:DI
+		     (lshiftrt:TI
+		       (mult:TI (sign_extend:TI
+				  (match_operand:DI 1 "nonimmediate_operand" ""))
+				(sign_extend:TI
+				  (match_operand:DI 2 "register_operand" "")))
+		       (const_int 64))))
+	      (clobber (match_scratch:DI 3 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*smuldi3_highpart_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (sign_extend:TI
+		       (match_operand:DI 1 "nonimmediate_operand" "%a"))
+		     (sign_extend:TI
+		       (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	    (const_int 64))))
+   (clobber (match_scratch:DI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "DI")])
+
+(define_expand "smulsi3_highpart"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (truncate:SI
+		     (lshiftrt:DI
+		       (mult:DI (sign_extend:DI
+				  (match_operand:SI 1 "nonimmediate_operand" ""))
+				(sign_extend:DI
+				  (match_operand:SI 2 "register_operand" "")))
+		       (const_int 32))))
+	      (clobber (match_scratch:SI 3 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+(define_insn "*smulsi3_highpart_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (sign_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (sign_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+(define_insn "*smulsi3_highpart_zext"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI (truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (sign_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (sign_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32)))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "mulxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387"
+  "")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(mult:MODEF (match_operand:MODEF 1 "register_operand" "")
+		    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "")
+
+;; SSE5 scalar multiply/add instructions are defined in sse.md.
+
+
+;; Divide instructions
+
+(define_insn "divqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(div:QI (match_operand:HI 1 "register_operand" "0")
+		(match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_insn "udivqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(udiv:QI (match_operand:HI 1 "register_operand" "0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "divxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(div:XF (match_operand:XF 1 "register_operand" "")
+		(match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387"
+  "")
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "")
+ 	(div:DF (match_operand:DF 1 "register_operand" "")
+ 		(match_operand:DF 2 "nonimmediate_operand" "")))]
+   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+   "")
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(div:SF (match_operand:SF 1 "register_operand" "")
+		(match_operand:SF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || TARGET_SSE_MATH"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+			 operands[2], SFmode);
+      DONE;
+    }
+})
+
+;; Remainder instructions.
+
+(define_expand "divmoddi4"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (div:DI (match_operand:DI 1 "register_operand" "")
+			   (match_operand:DI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:DI 3 "register_operand" "")
+		   (mod:DI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmoddi4_nocltd_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=&a,?a")
+	(div:DI (match_operand:DI 2 "register_operand" "1,0")
+		(match_operand:DI 3 "nonimmediate_operand" "rm,rm")))
+   (set (match_operand:DI 1 "register_operand" "=&d,&d")
+	(mod:DI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi4_cltd_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(div:DI (match_operand:DI 2 "register_operand" "a")
+		(match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=&d")
+	(mod:DI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi_noext_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(div:DI (match_operand:DI 1 "register_operand" "0")
+		(match_operand:DI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 3 "register_operand" "=d")
+	(mod:DI (match_dup 1) (match_dup 2)))
+   (use (match_operand:DI 4 "register_operand" "3"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "idiv{q}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(div:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "nonimmediate_operand" "")))
+   (set (match_operand:DI 3 "register_operand" "")
+	(mod:DI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (ashiftrt:DI (match_dup 4) (const_int 63)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:DI (reg:DI 0) (match_dup 2)))
+	      (set (match_dup 3)
+		   (mod:DI (reg:DI 0) (match_dup 2)))
+	      (use (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  /* Avoid use of cltd in favor of a mov+shift.  */
+  if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
+    {
+      if (true_regnum (operands[1]))
+        emit_move_insn (operands[0], operands[1]);
+      else
+	emit_move_insn (operands[3], operands[1]);
+      operands[4] = operands[3];
+    }
+  else
+    {
+      gcc_assert (!true_regnum (operands[1]));
+      operands[4] = operands[1];
+    }
+})
+
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SI 3 "register_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmodsi4_nocltd"
+  [(set (match_operand:SI 0 "register_operand" "=&a,?a")
+	(div:SI (match_operand:SI 2 "register_operand" "1,0")
+		(match_operand:SI 3 "nonimmediate_operand" "rm,rm")))
+   (set (match_operand:SI 1 "register_operand" "=&d,&d")
+	(mod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi4_cltd"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(div:SI (match_operand:SI 2 "register_operand" "a")
+		(match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+	(mod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "optimize_function_for_size_p (cfun) || TARGET_USE_CLTD"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi_noext"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(mod:SI (match_dup 1) (match_dup 2)))
+   (use (match_operand:SI 4 "register_operand" "3"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "idiv{l}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(div:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "nonimmediate_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(mod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (ashiftrt:SI (match_dup 4) (const_int 31)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SI (reg:SI 0) (match_dup 2)))
+	      (set (match_dup 3)
+		   (mod:SI (reg:SI 0) (match_dup 2)))
+	      (use (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  /* Avoid use of cltd in favor of a mov+shift.  */
+  if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
+    {
+      if (true_regnum (operands[1]))
+        emit_move_insn (operands[0], operands[1]);
+      else
+	emit_move_insn (operands[3], operands[1]);
+      operands[4] = operands[3];
+    }
+  else
+    {
+      gcc_assert (!true_regnum (operands[1]));
+      operands[4] = operands[1];
+    }
+})
+;; %%% Split me.
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(div:HI (match_operand:HI 1 "register_operand" "0")
+		(match_operand:HI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:HI 3 "register_operand" "=&d")
+	(mod:HI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_HIMODE_MATH"
+  "cwtd\;idiv{w}\t%2"
+  [(set_attr "type" "multi")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "SI")])
+
+(define_insn "udivmoddi4"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(udiv:DI (match_operand:DI 1 "register_operand" "0")
+		 (match_operand:DI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 3 "register_operand" "=&d")
+	(umod:DI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "xor{q}\t%3, %3\;div{q}\t%2"
+  [(set_attr "type" "multi")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "DI")])
+
+(define_insn "*udivmoddi4_noext"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(udiv:DI (match_operand:DI 1 "register_operand" "0")
+		 (match_operand:DI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 3 "register_operand" "=d")
+	(umod:DI (match_dup 1) (match_dup 2)))
+   (use (match_dup 3))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "div{q}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(udiv:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "nonimmediate_operand" "")))
+   (set (match_operand:DI 3 "register_operand" "")
+	(umod:DI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:DI (match_dup 1) (match_dup 2)))
+	      (set (match_dup 3)
+		   (umod:DI (match_dup 1) (match_dup 2)))
+	      (use (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 3 "register_operand" "=&d")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "xor{l}\t%3, %3\;div{l}\t%2"
+  [(set_attr "type" "multi")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "SI")])
+
+(define_insn "*udivmodsi4_noext"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (use (match_dup 3))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "div{l}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nonimmediate_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(set (match_dup 3) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SI (match_dup 1) (match_dup 2)))
+	      (set (match_dup 3)
+		   (umod:SI (match_dup 1) (match_dup 2)))
+	      (use (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_expand "udivmodhi4"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (udiv:HI (match_operand:HI 1 "register_operand" "")
+		 	    (match_operand:HI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:HI 3 "register_operand" "")
+	   	   (umod:HI (match_dup 1) (match_dup 2)))
+	      (use (match_dup 4))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_HIMODE_MATH"
+  "operands[4] = gen_reg_rtx (HImode);")
+
+(define_insn "*udivmodhi_noext"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(udiv:HI (match_operand:HI 1 "register_operand" "0")
+		 (match_operand:HI 2 "nonimmediate_operand" "rm")))
+   (set (match_operand:HI 3 "register_operand" "=d")
+	(umod:HI (match_dup 1) (match_dup 2)))
+   (use (match_operand:HI 4 "register_operand" "3"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "div{w}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "HI")])
+
+;; We cannot use div/idiv for double division, because it causes
+;; "division by zero" on the overflow and that's not what we expect
+;; from truncate.  Because true (non truncating) double division is
+;; never generated, we can't create this insn anyway.
+;
+;(define_insn ""
+;  [(set (match_operand:SI 0 "register_operand" "=a")
+;	(truncate:SI
+;	  (udiv:DI (match_operand:DI 1 "register_operand" "A")
+;		   (zero_extend:DI
+;		     (match_operand:SI 2 "nonimmediate_operand" "rm")))))
+;   (set (match_operand:SI 3 "register_operand" "=d")
+;	(truncate:SI
+;	  (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
+;   (clobber (reg:CC FLAGS_REG))]
+;  ""
+;  "div{l}\t{%2, %0|%0, %2}"
+;  [(set_attr "type" "idiv")])
+
+;;- Logical AND instructions
+
+;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
+;; Note that this excludes ah.
+
+(define_insn "*testdi_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm")
+		  (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re"))
+	  (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,0,1,1")
+   (set_attr "mode" "SI,SI,DI,DI,DI")
+   (set_attr "pent_pair" "uv,np,uv,np,uv")])
+
+(define_insn "testsi_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm")
+		  (match_operand:SI 1 "general_operand" "i,i,ri"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "test{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testsi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI (match_operand:SI 0 "nonimmediate_operand" "")
+		  (match_operand:SI 1 "nonmemory_operand" ""))
+	  (const_int 0)))]
+  ""
+  "")
+
+(define_insn "*testhi_1"
+  [(set (reg FLAGS_REG)
+        (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm")
+			 (match_operand:HI 1 "general_operand" "n,n,rn"))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "test{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "mode" "HI")
+   (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ccz_1"
+  [(set (reg:CCZ FLAGS_REG)
+        (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "")
+			     (match_operand:QI 1 "nonmemory_operand" ""))
+		 (const_int 0)))]
+  ""
+  "")
+
+(define_insn "*testqi_1_maybe_si"
+  [(set (reg FLAGS_REG)
+        (compare
+	  (and:QI
+	    (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
+	    (match_operand:QI 1 "general_operand" "n,n,qn,n"))
+	  (const_int 0)))]
+   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+    && ix86_match_ccmode (insn,
+ 			 CONST_INT_P (operands[1])
+ 			 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 3)
+    {
+      if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
+	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+      return "test{l}\t{%1, %k0|%k0, %1}";
+    }
+  return "test{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1,1")
+   (set_attr "mode" "QI,QI,QI,SI")
+   (set_attr "pent_pair" "uv,np,uv,np")])
+
+(define_insn "*testqi_1"
+  [(set (reg FLAGS_REG)
+        (compare
+	  (and:QI
+	    (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm")
+	    (match_operand:QI 1 "general_operand" "n,n,qn"))
+	  (const_int 0)))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "mode" "QI")
+   (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ext_ccno_0"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand" ""))
+	  (const_int 0)))]
+  ""
+  "")
+
+(define_insn "*testqi_ext_0"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand" "n"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")
+   (set_attr "length_immediate" "1")
+   (set_attr "pent_pair" "np")])
+
+(define_insn "*testqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extend:SI
+	      (match_operand:QI 1 "general_operand" "Qm")))
+	  (const_int 0)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extend:SI
+	      (match_operand:QI 1 "register_operand" "Q")))
+	  (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+;; Combine likes to form bit extractions for some tests.  Humor it.
+(define_insn "*testqi_ext_3"
+  [(set (reg FLAGS_REG)
+        (compare (zero_extract:SI
+		   (match_operand 0 "nonimmediate_operand" "rm")
+		   (match_operand:SI 1 "const_int_operand" "")
+		   (match_operand:SI 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
+   && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+   && (GET_MODE (operands[0]) == SImode
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == QImode)"
+  "#")
+
+(define_insn "*testqi_ext_3_rex64"
+  [(set (reg FLAGS_REG)
+        (compare (zero_extract:DI
+		   (match_operand 0 "nonimmediate_operand" "rm")
+		   (match_operand:DI 1 "const_int_operand" "")
+		   (match_operand:DI 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
+   /* Ensure that resulting mask is zero or sign extended operand.  */
+   && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+       || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64
+	   && INTVAL (operands[1]) > 32))
+   && (GET_MODE (operands[0]) == SImode
+       || GET_MODE (operands[0]) == DImode
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == QImode)"
+  "#")
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+        (match_operator 1 "compare_operator"
+	  [(zero_extract
+	     (match_operand 2 "nonimmediate_operand" "")
+	     (match_operand 3 "const_int_operand" "")
+	     (match_operand 4 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+{
+  rtx val = operands[2];
+  HOST_WIDE_INT len = INTVAL (operands[3]);
+  HOST_WIDE_INT pos = INTVAL (operands[4]);
+  HOST_WIDE_INT mask;
+  enum machine_mode mode, submode;
+
+  mode = GET_MODE (val);
+  if (MEM_P (val))
+    {
+      /* ??? Combine likes to put non-volatile mem extractions in QImode
+	 no matter the size of the test.  So find a mode that works.  */
+      if (! MEM_VOLATILE_P (val))
+	{
+	  mode = smallest_mode_for_size (pos + len, MODE_INT);
+	  val = adjust_address (val, mode, 0);
+	}
+    }
+  else if (GET_CODE (val) == SUBREG
+	   && (submode = GET_MODE (SUBREG_REG (val)),
+	       GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+	   && pos + len <= GET_MODE_BITSIZE (submode))
+    {
+      /* Narrow a paradoxical subreg to prevent partial register stalls.  */
+      mode = submode;
+      val = SUBREG_REG (val);
+    }
+  else if (mode == HImode && pos + len <= 8)
+    {
+      /* Small HImode tests can be converted to QImode.  */
+      mode = QImode;
+      val = gen_lowpart (QImode, val);
+    }
+
+  if (len == HOST_BITS_PER_WIDE_INT)
+    mask = -1;
+  else
+    mask = ((HOST_WIDE_INT)1 << len) - 1;
+  mask <<= pos;
+
+  operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
+})
+
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the conversion only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "register_operand" "")
+	        (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+   "reload_completed
+    && QI_REG_P (operands[2])
+    && GET_MODE (operands[2]) != QImode
+    && ((ix86_match_ccmode (insn, CCZmode)
+    	 && !(INTVAL (operands[3]) & ~(255 << 8)))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~(127 << 8))))"
+  [(set (match_dup 0)
+	(match_op_dup 1
+	  [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8))
+		   (match_dup 3))
+	   (const_int 0)]))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);")
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "nonimmediate_operand" "")
+	        (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+   "reload_completed
+    && GET_MODE (operands[2]) != QImode
+    && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
+    && ((ix86_match_ccmode (insn, CCZmode)
+	 && !(INTVAL (operands[3]) & ~255))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~127)))"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+			 (const_int 0)]))]
+  "operands[2] = gen_lowpart (QImode, operands[2]);
+   operands[3] = gen_lowpart (QImode, operands[3]);")
+
+
+;; %%% This used to optimize known byte-wide and operations to memory,
+;; and sometimes to QImode registers.  If this is considered useful,
+;; it should be done with splitters.
+
+(define_expand "anddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(and:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		(match_operand:DI 2 "x86_64_szext_general_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
+
+(define_insn "*anddi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
+		(match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      {
+	enum machine_mode mode;
+
+	gcc_assert (CONST_INT_P (operands[2]));
+        if (INTVAL (operands[2]) == 0xff)
+	  mode = QImode;
+	else
+	  {
+	    gcc_assert (INTVAL (operands[2]) == 0xffff);
+	    mode = HImode;
+	  }
+
+	operands[1] = gen_lowpart (mode, operands[1]);
+	if (mode == QImode)
+	  return "movz{bq|x}\t{%1,%0|%0, %1}";
+	else
+	  return "movz{wq|x}\t{%1,%0|%0, %1}";
+      }
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "and{l}\t{%k2, %k0|%k0, %k2}";
+      else
+	return "and{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,*,0")
+   (set_attr "mode" "SI,DI,DI,DI")])
+
+(define_insn "*anddi_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
+			 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+	(and:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, DImode, operands)"
+  "@
+   and{l}\t{%k2, %k0|%k0, %k2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI,DI,DI")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (AND, SImode, operands); DONE;")
+
+(define_insn "*andsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
+		(match_operand:SI 2 "general_operand" "ri,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      {
+	enum machine_mode mode;
+
+	gcc_assert (CONST_INT_P (operands[2]));
+        if (INTVAL (operands[2]) == 0xff)
+	  mode = QImode;
+	else
+	  {
+	    gcc_assert (INTVAL (operands[2]) == 0xffff);
+	    mode = HImode;
+	  }
+
+	operands[1] = gen_lowpart (mode, operands[1]);
+	if (mode == QImode)
+	  return "movz{bl|x}\t{%1,%0|%0, %1}";
+	else
+	  return "movz{wl|x}\t{%1,%0|%0, %1}";
+      }
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "and{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,0")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_dup 0)
+	     (const_int -65536)))
+   (clobber (reg:CC FLAGS_REG))]
+  "optimize_function_for_size_p (cfun) || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand" "")
+	(and (match_dup 0)
+	     (const_int -256)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (QImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand" "")
+	(and (match_dup 0)
+	     (const_int -65281)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+  [(parallel [(set (zero_extract:SI (match_dup 0)
+				    (const_int 8)
+				    (const_int 8))
+		   (xor:SI
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);")
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andsi_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:SI 2 "general_operand" "g,ri"))
+		 (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+	(and:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		(match_operand:HI 2 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (AND, HImode, operands); DONE;")
+
+(define_insn "*andhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
+		(match_operand:HI 2 "general_operand" "rn,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (CONST_INT_P (operands[2]));
+      gcc_assert (INTVAL (operands[2]) == 0xff);
+      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+      return "and{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,0")
+   (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*andhi_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:HI 2 "general_operand" "rmn,rn"))
+		 (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+	(and:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, HImode, operands)"
+  "and{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		(match_operand:QI 2 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (AND, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*andqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		(match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, QImode, operands)"
+  "@
+   and{b}\t{%2, %0|%0, %2}
+   and{b}\t{%2, %0|%0, %2}
+   and{l}\t{%k2, %k0|%k0, %k2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "qn,qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_maybe_si"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		      (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		      (match_operand:QI 2 "general_operand" "qmn,qn,n"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+	(and:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (AND, QImode, operands)
+   && ix86_match_ccmode (insn,
+			 CONST_INT_P (operands[2])
+			 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 2)
+    {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+        operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+      return "and{l}\t{%2, %k0|%k0, %2}";
+    }
+  return "and{b}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		   (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:QI 2 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+	(and:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, QImode, operands)"
+  "and{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		   (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+		   (match_operand:QI 1 "nonimmediate_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(and:QI (match_dup 0) (match_dup 1)))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+;; ??? A bug in recog prevents it from recognizing a const_int as an
+;; operand to zero_extend in andqi_ext_1.  It was checking explicitly
+;; for a QImode operand, which of course failed.
+
+(define_insn "andqi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "QI")])
+
+;; Generated by peephole translating test to and.  This shows up
+;; often in fp comparisons.
+
+(define_insn "*andqi_ext_0_cc"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 2 "const_int_operand" "n"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 8)
+	    (const_int 8))
+	  (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "general_operand" "Qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand 2 "ext_register_operand" "Q"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it introduces risk
+;; of memory mismatch stalls.  We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_operand 1 "register_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (and:SI (zero_extract:SI (match_dup 1)
+					    (const_int 8) (const_int 8))
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_operand 1 "general_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~255)
+    && !(INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (and:QI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (QImode, operands[0]);
+   operands[1] = gen_lowpart (QImode, operands[1]);
+   operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical inclusive OR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(ior:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		(match_operand:DI 2 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (IOR, DImode, operands); DONE;")
+
+(define_insn "*iordi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:DI 2 "x86_64_general_operand" "re,rme")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (IOR, DImode, operands)"
+  "or{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*iordi_2_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+	(ior:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, DImode, operands)"
+  "or{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*iordi_3_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+			 (match_operand:DI 2 "x86_64_general_operand" "rem"))
+		 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, DImode, operands)"
+  "or{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ior:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (IOR, SImode, operands); DONE;")
+
+(define_insn "*iorsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "ri,g")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (IOR, SImode, operands)"
+  "or{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*iorsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)"
+  "or{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_1_zext_imm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+		(match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "or{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2"
+  [(set (reg FLAGS_REG)
+	(compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:SI 2 "general_operand" "g,ri"))
+		 (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+	(ior:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, SImode, operands)"
+  "or{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*iorsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, SImode, operands)"
+  "or{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2_zext_imm"
+  [(set (reg FLAGS_REG)
+	(compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, SImode, operands)"
+  "or{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_3"
+  [(set (reg FLAGS_REG)
+	(compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "or{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(ior:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		(match_operand:HI 2 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (IOR, HImode, operands); DONE;")
+
+(define_insn "*iorhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+	(ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:HI 2 "general_operand" "rmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (IOR, HImode, operands)"
+  "or{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_2"
+  [(set (reg FLAGS_REG)
+	(compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:HI 2 "general_operand" "rmn,rn"))
+		 (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+	(ior:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, HImode, operands)"
+  "or{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_3"
+  [(set (reg FLAGS_REG)
+	(compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+			 (match_operand:HI 2 "general_operand" "rmn"))
+		 (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "or{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(ior:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		(match_operand:QI 2 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (IOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*iorqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+	(ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		(match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (IOR, QImode, operands)"
+  "@
+   or{b}\t{%2, %0|%0, %2}
+   or{b}\t{%2, %0|%0, %2}
+   or{l}\t{%k2, %k0|%k0, %k2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*iorqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
+	(ior:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "qmn,qn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "or{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2"
+  [(set (reg FLAGS_REG)
+	(compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:QI 2 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+	(ior:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (IOR, QImode, operands)"
+  "or{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+			 (match_operand:QI 1 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(ior:QI (match_dup 0) (match_dup 1)))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "or{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_3"
+  [(set (reg FLAGS_REG)
+	(compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+			 (match_operand:QI 2 "general_operand" "qmn"))
+		 (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "or{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(ior:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "or{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(ior:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "general_operand" "Qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "or{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(ior:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand 2 "ext_register_operand" "Q"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "or{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(ior:SI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+	  		   (const_int 8)
+			   (const_int 8))
+	  (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+	  		   (const_int 8)
+			   (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "ior{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ior (match_operand 1 "register_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (ior:SI (zero_extract:SI (match_dup 1)
+					    (const_int 8) (const_int 8))
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ior (match_operand 1 "general_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~255)
+    && (INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (ior:QI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (QImode, operands[0]);
+   operands[1] = gen_lowpart (QImode, operands[1]);
+   operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical XOR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(xor:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		(match_operand:DI 2 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (XOR, DImode, operands); DONE;")
+
+(define_insn "*xordi_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (XOR, DImode, operands)"
+  "xor{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*xordi_2_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+	(xor:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, DImode, operands)"
+  "xor{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*xordi_3_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+			 (match_operand:DI 2 "x86_64_general_operand" "rem"))
+		 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, DImode, operands)"
+  "xor{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(xor:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (XOR, SImode, operands); DONE;")
+
+(define_insn "*xorsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "ri,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; Add speccase for immediates
+(define_insn "*xorsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_1_zext_imm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+		(match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2"
+  [(set (reg FLAGS_REG)
+	(compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:SI 2 "general_operand" "g,ri"))
+		 (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+	(xor:SI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*xorsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2_zext_imm"
+  [(set (reg FLAGS_REG)
+	(compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, SImode, operands)"
+  "xor{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_3"
+  [(set (reg FLAGS_REG)
+	(compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			 (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "xor{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_expand "xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(xor:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		(match_operand:HI 2 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (XOR, HImode, operands); DONE;")
+
+(define_insn "*xorhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+	(xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:HI 2 "general_operand" "rmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (XOR, HImode, operands)"
+  "xor{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_2"
+  [(set (reg FLAGS_REG)
+	(compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+			 (match_operand:HI 2 "general_operand" "rmn,rn"))
+		 (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+	(xor:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, HImode, operands)"
+  "xor{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_3"
+  [(set (reg FLAGS_REG)
+	(compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+			 (match_operand:HI 2 "general_operand" "rmn"))
+		 (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "xor{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "HI")])
+
+(define_expand "xorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(xor:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		(match_operand:QI 2 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (XOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*xorqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+	(xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		(match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (XOR, QImode, operands)"
+  "@
+   xor{b}\t{%2, %0|%0, %2}
+   xor{b}\t{%2, %0|%0, %2}
+   xor{l}\t{%k2, %k0|%k0, %k2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*xorqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(xor:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "qn,qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "xor{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "general_operand" "Qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand 2 "ext_register_operand" "Q"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+	  		   (const_int 8)
+			   (const_int 8))
+	  (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+	  		   (const_int 8)
+			   (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "xor{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		  (match_operand:QI 2 "general_operand" "qmn,qn"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+	(xor:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (XOR, QImode, operands)"
+  "xor{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+			 (match_operand:QI 1 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(xor:QI (match_dup 0) (match_dup 1)))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "xor{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		  (match_operand:QI 2 "general_operand" "qmn"))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "xor{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand:QI 2 "general_operand" "qmn"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+	  (match_dup 2)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand:QI 2 "nonmemory_operand" "Qn"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+	  (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_expand "xorqi_cc_ext_1"
+  [(parallel [
+     (set (reg:CCNO FLAGS_REG)
+	  (compare:CCNO
+	    (xor:SI
+	      (zero_extract:SI
+		(match_operand 1 "ext_register_operand" "")
+		(const_int 8)
+		(const_int 8))
+	      (match_operand:QI 2 "general_operand" ""))
+	    (const_int 0)))
+     (set (zero_extract:SI (match_operand 0 "ext_register_operand" "")
+			   (const_int 8)
+			   (const_int 8))
+	  (xor:SI
+	    (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+	    (match_dup 2)))])]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(xor (match_operand 1 "register_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (xor:SI (zero_extract:SI (match_dup 1)
+					    (const_int 8) (const_int 8))
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since XOR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(xor (match_operand 1 "general_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~255)
+    && (INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (xor:QI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (QImode, operands[0]);
+   operands[1] = gen_lowpart (QImode, operands[1]);
+   operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Negation instructions
+
+(define_expand "negti2"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_unary_operator (NEG, TImode, operands); DONE;")
+
+(define_insn "*negti2_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=ro")
+	(neg:TI (match_operand:TI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && ix86_unary_operator_ok (NEG, TImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+	  (compare:CCZ (neg:DI (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:DI (match_dup 1)))])
+   (parallel
+    [(set (match_dup 2)
+	  (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+			    (match_dup 3))
+		   (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 2)
+	  (neg:DI (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (&operands[0], 2, &operands[0], &operands[2]);")
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+  ""
+  "ix86_expand_unary_operator (NEG, DImode, operands); DONE;")
+
+(define_insn "*negdi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro")
+	(neg:DI (match_operand:DI 1 "general_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && ix86_unary_operator_ok (NEG, DImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(neg:DI (match_operand:DI 1 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+	  (compare:CCZ (neg:SI (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:SI (match_dup 1)))])
+   (parallel
+    [(set (match_dup 2)
+	  (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+			    (match_dup 3))
+		   (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 2)
+	  (neg:SI (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "split_di (&operands[0], 2, &operands[0], &operands[2]);");
+
+(define_insn "*negdi2_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+  "neg{q}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "DI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negdi2_cmpz_rex64"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(neg:DI (match_dup 1)))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+  "neg{q}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "DI")])
+
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+  "ix86_expand_unary_operator (NEG, SImode, operands); DONE;")
+
+(define_insn "*negsi2_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; Combine is quite creative about this pattern.
+(define_insn "*negsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
+					(const_int 32)))
+		     (const_int 32)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negsi2_cmpz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+		     (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(neg:SI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_cmpz_zext"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (lshiftrt:DI
+		       (neg:DI (ashift:DI
+				 (match_operand:DI 1 "register_operand" "0")
+				 (const_int 32)))
+		       (const_int 32))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+					(const_int 32)))
+		     (const_int 32)))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+(define_expand "neghi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_unary_operator (NEG, HImode, operands); DONE;")
+
+(define_insn "*neghi2_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, HImode, operands)"
+  "neg{w}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "HI")])
+
+(define_insn "*neghi2_cmpz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+		     (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(neg:HI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, HImode, operands)"
+  "neg{w}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "HI")])
+
+(define_expand "negqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_unary_operator (NEG, QImode, operands); DONE;")
+
+(define_insn "*negqi2_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, QImode, operands)"
+  "neg{b}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI")])
+
+(define_insn "*negqi2_cmpz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+		     (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(neg:QI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, QImode, operands)"
+  "neg{b}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI")])
+
+;; Changing of sign for FP values is doable using integer unit too.
+
+(define_expand "<code><mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*absneg<mode>2_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
+   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)"
+  "#")
+
+(define_insn "*absneg<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0 ,x,0")]))
+   (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#")
+
+(define_insn "*absneg<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+	(match_operator:X87MODEF 3 "absneg_operator"
+	  [(match_operand:X87MODEF 1 "register_operand" "0,0")]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "#")
+
+(define_expand "<code>tf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(absneg:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
+	(match_operator:TF 3 "absneg_operator"
+	  [(match_operand:TF 1 "register_operand" "0,x")]))
+   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE2"
+  "#")
+
+;; Splitters for fp abs and neg.
+
+(define_split
+  [(set (match_operand 0 "fp_register_operand" "")
+	(match_operator 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "absneg_operator"
+	  [(match_operand 1 "register_operand" "")]))
+   (use (match_operand 2 "nonimmediate_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode vmode = GET_MODE (operands[2]);
+  rtx tmp;
+
+  operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
+  operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
+  if (operands_match_p (operands[0], operands[2]))
+    {
+      tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+  if (GET_CODE (operands[3]) == ABS)
+    tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+  else
+    tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+  operands[3] = tmp;
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand:V4SF 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = gen_int_mode (0x7fffffff, SImode);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = gen_int_mode (0x80000000, SImode);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  if (TARGET_64BIT)
+    {
+      tmp = gen_lowpart (DImode, operands[0]);
+      tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
+      operands[0] = tmp;
+
+      if (GET_CODE (operands[1]) == ABS)
+	tmp = const0_rtx;
+      else
+	tmp = gen_rtx_NOT (DImode, tmp);
+    }
+  else
+    {
+      operands[0] = gen_highpart (SImode, operands[0]);
+      if (GET_CODE (operands[1]) == ABS)
+	{
+	  tmp = gen_int_mode (0x7fffffff, SImode);
+	  tmp = gen_rtx_AND (SImode, operands[0], tmp);
+	}
+      else
+	{
+	  tmp = gen_int_mode (0x80000000, SImode);
+	  tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+	}
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_rtx_REG (SImode,
+			     true_regnum (operands[0])
+			     + (TARGET_64BIT ? 1 : 2));
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = GEN_INT (0x7fff);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = GEN_INT (0x8000);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+;; Conditionalize these after reload. If they match before reload, we
+;; lose the clobber and ability to use integer instructions.
+
+(define_insn "*<code><mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
+  "TARGET_80387
+   && (reload_completed
+       || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+  "f<absnegprefix>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(absneg:DF (float_extend:DF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+  "f<absnegprefix>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "DF")])
+
+(define_insn "*<code>extendsfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absnegprefix>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+(define_insn "*<code>extenddfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		      (match_operand:DF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absnegprefix>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+;; Copysign instructions
+
+(define_mode_iterator CSGNMODE [SF DF TF])
+(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
+(define_expand "copysign<mode>3"
+  [(match_operand:CSGNMODE 0 "register_operand" "")
+   (match_operand:CSGNMODE 1 "nonmemory_operand" "")
+   (match_operand:CSGNMODE 2 "register_operand" "")]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+{
+  ix86_expand_copysign (operands);
+  DONE;
+})
+
+(define_insn_and_split "copysign<mode>3_const"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+	(unspec:CSGNMODE
+	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
+	   (match_operand:CSGNMODE 2 "register_operand" "0")
+	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+	  UNSPEC_COPYSIGN))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_const (operands);
+  DONE;
+})
+
+(define_insn "copysign<mode>3_var"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x")
+	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x")
+	   (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
+	   (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+  "#")
+
+(define_split
+  [(set (match_operand:CSGNMODE 0 "register_operand" "")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand" "")
+	   (match_operand:CSGNMODE 3 "register_operand" "")
+	   (match_operand:<CSGNVMODE> 4 "" "")
+	   (match_operand:<CSGNVMODE> 5 "" "")]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 ""))]
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_SSE2 && (<MODE>mode == TFmode)))
+   && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_var (operands);
+  DONE;
+})
+
+;; One complement instructions
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(not:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_unary_operator (NOT, DImode, operands); DONE;")
+
+(define_insn "*one_cmpldi2_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)"
+  "not{q}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "DI")])
+
+(define_insn "*one_cmpldi2_2_rex64"
+  [(set (reg FLAGS_REG)
+	(compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(not:DI (match_dup 1)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, DImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:DI (match_operand:DI 3 "nonimmediate_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:DI 1 "nonimmediate_operand" "")
+	(not:DI (match_dup 3)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2
+		     [(xor:DI (match_dup 3) (const_int -1))
+		      (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:DI (match_dup 3) (const_int -1)))])]
+  "")
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+  "ix86_expand_unary_operator (NOT, SImode, operands); DONE;")
+
+(define_insn "*one_cmplsi2_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))]
+  "ix86_unary_operator_ok (NOT, SImode, operands)"
+  "not{l}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "not{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_2"
+  [(set (reg FLAGS_REG)
+	(compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(not:SI (match_dup 1)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:SI (match_operand:SI 3 "nonimmediate_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:SI 1 "nonimmediate_operand" "")
+	(not:SI (match_dup 3)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:SI (match_dup 3) (const_int -1)))])]
+  "")
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (not:SI (match_dup 1))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:SI (match_operand:SI 3 "register_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:DI 1 "register_operand" "")
+	(zero_extend:DI (not:SI (match_dup 3))))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]
+  "")
+
+(define_expand "one_cmplhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_unary_operator (NOT, HImode, operands); DONE;")
+
+(define_insn "*one_cmplhi2_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+  "ix86_unary_operator_ok (NOT, HImode, operands)"
+  "not{w}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "HI")])
+
+(define_insn "*one_cmplhi2_2"
+  [(set (reg FLAGS_REG)
+	(compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(not:HI (match_dup 1)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NEG, HImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "HI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:HI (match_operand:HI 3 "nonimmediate_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:HI 1 "nonimmediate_operand" "")
+	(not:HI (match_dup 3)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1))
+		      		    (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:HI (match_dup 3) (const_int -1)))])]
+  "")
+
+;; %%% Potential partial reg stall on alternative 1.  What to do?
+(define_expand "one_cmplqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_unary_operator (NOT, QImode, operands); DONE;")
+
+(define_insn "*one_cmplqi2_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+  "ix86_unary_operator_ok (NOT, QImode, operands)"
+  "@
+   not{b}\t%0
+   not{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI,SI")])
+
+(define_insn "*one_cmplqi2_2"
+  [(set (reg FLAGS_REG)
+	(compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(not:QI (match_dup 1)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, QImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:QI (match_operand:QI 3 "nonimmediate_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:QI 1 "nonimmediate_operand" "")
+	(not:QI (match_dup 3)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1))
+		      		    (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:QI (match_dup 3) (const_int -1)))])]
+  "")
+
+;; Arithmetic shift instructions
+
+;; DImode shifts are implemented using the i386 "shift double" opcode,
+;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
+;; is variable, then the count is in %cl and the "imm" operand is dropped
+;; from the assembler input.
+;;
+;; This instruction shifts the target reg/mem as usual, but instead of
+;; shifting in zeros, bits are shifted in from reg operand.  If the insn
+;; is a left shift double, bits are taken from the high order bits of
+;; reg, else if the insn is a shift right double, bits are taken from the
+;; low order bits of reg.  So if %eax is "1234" and %edx is "5678",
+;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
+;;
+;; Since sh[lr]d does not change the `reg' operand, that is done
+;; separately, making all shifts emit pairs of shift double and normal
+;; shift.  Since sh[lr]d does not shift more than 31 bits, and we wish to
+;; support a 63 bit shift, each shift where the count is in a reg expands
+;; to a pair of shifts, a branch, a shift by 32 and a label.
+;;
+;; If the shift count is a constant, we need never emit more than one
+;; shift pair, instead using moves and sign extension for counts greater
+;; than 31.
+
+(define_expand "ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;")
+
+;; This pattern must be defined before *ashlti3_1 to prevent
+;; combine pass from converting sse2_ashlti3 to *ashlti3_1.
+
+(define_insn "*avx_ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(ashift:TI (match_operand:TI 1 "register_operand" "x")
+		   (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(ashift:TI (match_operand:TI 1 "register_operand" "0")
+		   (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "pslldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*ashlti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=&r,r")
+	(ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0")
+		   (match_operand:QI 2 "nonmemory_operand" "Oc,Oc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+		   (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+			      (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_64BIT"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], TImode); DONE;")
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shld"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "x86_64_shift_adj_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+			     (const_int 64))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "")
+        (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			 (match_operand:DI 1 "register_operand" "")
+			 (match_dup 0)))
+   (set (match_dup 1)
+	(if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			 (match_operand:DI 3 "register_operand" "r")
+			 (match_dup 1)))]
+  "TARGET_64BIT"
+  "")
+
+(define_expand "x86_64_shift_adj_2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  "TARGET_64BIT"
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  ix86_expand_clear (operands[1]);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
+
+(define_insn "*ashldi3_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cJ,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "add{q}\t%0, %0";
+
+    case TYPE_LEA:
+      gcc_assert (CONST_INT_P (operands[2]));
+      gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3);
+      operands[1] = gen_rtx_MULT (DImode, operands[1],
+				  GEN_INT (1 << INTVAL (operands[2])));
+      return "lea{q}\t{%a1, %0|%0, %a1}";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{q}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{q}\t%0";
+      else
+	return "sal{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "index_register_operand" "")
+		   (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(mult:DI (match_dup 1)
+		 (match_dup 2)))]
+  "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);")
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashldi3_cmp_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(ashift:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{q}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{q}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{q}\t%0";
+      else
+	return "sal{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{q}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{q}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{q}\t%0";
+      else
+	return "sal{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r,r")
+	(ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
+		   (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+			      (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "!TARGET_64BIT && TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], DImode); DONE;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		     ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shld"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "x86_shift_adj_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+			     (const_int 32))
+		     (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			 (match_operand:SI 1 "register_operand" "")
+			 (match_dup 0)))
+   (set (match_dup 1)
+	(if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			 (match_operand:SI 3 "register_operand" "r")
+			 (match_dup 1)))]
+  "TARGET_CMOVE"
+  "")
+
+(define_expand "x86_shift_adj_2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  ix86_expand_clear (operands[1]);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;")
+
+(define_insn "*ashlsi3_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "add{l}\t%0, %0";
+
+    case TYPE_LEA:
+      return "#";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{l}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%0";
+      else
+	return "sal{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ashift (match_operand 1 "index_register_operand" "")
+                (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4"
+  [(const_int 0)]
+{
+  rtx pat;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  if (GET_MODE_SIZE (mode) < 4)
+    operands[0] = gen_lowpart (SImode, operands[0]);
+  if (mode != Pmode)
+    operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+
+  pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Rare case of shifting RSP is handled by generating move and shift
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ashift (match_operand 1 "register_operand" "")
+                (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  rtx pat, clob;
+  emit_move_insn (operands[0], operands[1]);
+  pat = gen_rtx_SET (VOIDmode, operands[0],
+		     gen_rtx_ASHIFT (GET_MODE (operands[0]),
+				     operands[0], operands[2]));
+  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob)));
+  DONE;
+})
+
+(define_insn "*ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
+			(match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    case TYPE_LEA:
+      return "#";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{l}\t{%b2, %k0|%k0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		     (const_int 0))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (ashift (match_operand 1 "register_operand" "")
+				(match_operand:QI 2 "const_int_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0) (zero_extend:DI
+			(subreg:SI (mult:SI (match_dup 1)
+					    (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlsi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
+   "(optimize_function_for_size_p (cfun)
+     || !TARGET_PARTIAL_FLAG_REG_STALL
+     || (operands[2] == const1_rtx
+	 && (TARGET_SHIFT1
+	     || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{l}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%0";
+      else
+	return "sal{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{l}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%0";
+      else
+	return "sal{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{l}\t{%b2, %k0|%k0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		     (const_int 0))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+(define_expand "ashlhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;")
+
+(define_insn "*ashlhi3_1_lea"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{w}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "HI,SI")])
+
+(define_insn "*ashlhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{w}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlhi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashift:HI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{w}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{w}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "HI")])
+
+(define_expand "ashlqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+
+(define_insn "*ashlqi3_1_lea"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%b2, %k0|%k0, %b2}";
+	  else
+	    return "sal{b}\t{%b2, %0|%0, %b2}";
+	}
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "QI,SI,SI")])
+
+(define_insn "*ashlqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%b2, %k0|%k0, %b2}";
+	  else
+	    return "sal{b}\t{%b2, %0|%0, %b2}";
+	}
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "QI,SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlqi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(ashift:QI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{b}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{b}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{b}\t%0";
+      else
+	return "sal{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*ashlqi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{b}\t%0, %0";
+
+    default:
+      if (REG_P (operands[2]))
+	return "sal{b}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+	       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{b}\t%0";
+      else
+	return "sal{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set_attr "mode" "QI")])
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "ashrti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;")
+
+(define_insn "*ashrti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+		     (match_operand:QI 2 "nonmemory_operand" "Oc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+		   (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+			        (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_64BIT"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, operands[3], TImode); DONE;")
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shrd"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashiftrt:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
+
+(define_expand "x86_64_shift_adj_3"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63)));
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_insn "ashrdi3_63_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:DI 2 "const_int_operand" "i,i")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 63
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "@
+   {cqto|cqo}
+   sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1_one_bit_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrdi3_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "@
+   sar{q}\t{%2, %0|%0, %2}
+   sar{q}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_one_bit_cmp_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrdi3_one_bit_cconly_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_cmp_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+			        (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "!TARGET_64BIT && TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, operands[3], DImode); DONE;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		     ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shrd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashiftrt:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "pent_pair" "np")
+   (set_attr "mode" "SI")])
+
+(define_expand "x86_shift_adj_3"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31)));
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_expand "ashrsi3_31"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+	           (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+		                (match_operand:SI 2 "const_int_operand" "i,i")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_insn "*ashrsi3_31"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:SI 2 "const_int_operand" "i,i")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_31_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*d,r")
+	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+				     (match_operand:SI 2 "const_int_operand" "i,i"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && INTVAL (operands[2]) == 31
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*ashrsi3_1_one_bit"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrsi3_1_one_bit_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+				     (match_operand:QI 2 "const1_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%k0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   sar{l}\t{%2, %0|%0, %2}
+   sar{l}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0")
+				     (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   sar{l}\t{%2, %k0|%k0, %2}
+   sar{l}\t{%b2, %k0|%k0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrsi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_one_bit_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%k0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*ashrhi3_1_one_bit"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+	(ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "@
+   sar{w}\t{%2, %0|%0, %2}
+   sar{w}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:HI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrhi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashiftrt:HI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+(define_insn "*ashrhi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+(define_expand "ashrqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*ashrqi3_1_one_bit"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrqi3_1_one_bit_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(ashiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t%0"
+  [(set_attr "type" "ishift1")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "@
+   sar{b}\t{%2, %0|%0, %2}
+   sar{b}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+	(ashiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   sar{b}\t{%1, %0|%0, %1}
+   sar{b}\t{%b1, %0|%0, %b1}"
+  [(set_attr "type" "ishift1")
+   (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(ashiftrt:QI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*ashrqi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(ashiftrt:QI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+
+;; Logical shift instructions
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;")
+
+;; This pattern must be defined before *lshrti3_1 to prevent
+;; combine pass from converting sse2_lshrti3 to *lshrti3_1.
+
+(define_insn "*avx_lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+ 	(lshiftrt:TI (match_operand:TI 1 "register_operand" "x")
+		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+ 	(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "psrldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*lshrti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+		     (match_operand:QI 2 "nonmemory_operand" "Oc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+		   (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+			        (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_64BIT"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, operands[3], TImode); DONE;")
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+
+(define_insn "*lshrdi3_1_one_bit_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrdi3_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "@
+   shr{q}\t{%2, %0|%0, %2}
+   shr{q}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_one_bit_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrdi3_cconly_one_bit_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_63_operand" "J"))
+	  (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+			        (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "!TARGET_64BIT && TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, operands[3], DImode); DONE;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		     ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*lshrsi3_1_one_bit"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrsi3_1_one_bit_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%k0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "@
+   shr{l}\t{%2, %0|%0, %2}
+   shr{l}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		       (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "@
+   shr{l}\t{%2, %k0|%k0, %2}
+   shr{l}\t{%b2, %k0|%k0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrsi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_cmp_one_bit_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%k0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cconly"
+  [(set (reg FLAGS_REG)
+      (compare
+	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+        (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+(define_expand "lshrhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*lshrhi3_1_one_bit"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+	(lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "@
+   shr{w}\t{%2, %0|%0, %2}
+   shr{w}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:HI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrhi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(lshiftrt:HI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+(define_insn "*lshrhi3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
+(define_expand "lshrqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*lshrqi3_1_one_bit"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrqi3_1_one_bit_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(lshiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+  "shr{b}\t%0"
+  [(set_attr "type" "ishift1")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "@
+   shr{b}\t{%2, %0|%0, %2}
+   shr{b}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+	(lshiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   shr{b}\t{%1, %0|%0, %1}
+   shr{b}\t{%b1, %0|%0, %b1}"
+  [(set_attr "type" "ishift1")
+   (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_one_bit_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(lshiftrt:QI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*lshrqi2_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(lshiftrt:QI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi2_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		       (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
+;; Rotate instructions
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+{
+  if (TARGET_64BIT)
+    {
+      ix86_expand_binary_operator (ROTATE, DImode, operands);
+      DONE;
+    }
+  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    FAIL;
+  emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (rotate:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:QI 2 "const_1_to_31_operand" "I")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+         (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
+                 (lshiftrt:SI (match_dup 5)
+                              (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+         (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
+                 (lshiftrt:SI (match_dup 3)
+                              (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
+
+(define_insn "*rotlsi3_1_one_bit_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATE, DImode, operands)"
+  "rol{q}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotldi3_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+	(rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "e,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)"
+  "@
+   rol{q}\t{%2, %0|%0, %2}
+   rol{q}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "DI")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
+
+(define_insn "*rotlsi3_1_one_bit"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+  "rol{l}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotlsi3_1_one_bit_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "const1_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+  "rol{l}\t%k0"
+  [(set_attr "type" "rotate")
+   (set_attr "length" "2")])
+
+(define_insn "*rotlsi3_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+	(rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATE, SImode, operands)"
+  "@
+   rol{l}\t{%2, %0|%0, %2}
+   rol{l}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rotlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "register_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+  "@
+   rol{l}\t{%2, %k0|%k0, %2}
+   rol{l}\t{%b2, %k0|%k0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
+
+(define_insn "*rotlhi3_1_one_bit"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATE, HImode, operands)"
+  "rol{w}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotlhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+	(rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATE, HImode, operands)"
+  "@
+   rol{w}\t{%2, %0|%0, %2}
+   rol{w}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+       (rotate:HI (match_dup 0) (const_int 8)))
+  (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (strict_low_part (match_dup 0))
+		  (bswap:HI (match_dup 0)))
+	     (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "rotlqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
+
+(define_insn "*rotlqi3_1_one_bit_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(rotate:QI (match_dup 0)
+		   (match_operand:QI 1 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+  "rol{b}\t%0"
+  [(set_attr "type" "rotate1")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotlqi3_1_one_bit"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATE, QImode, operands)"
+  "rol{b}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotlqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+	(rotate:QI (match_dup 0)
+		   (match_operand:QI 1 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   rol{b}\t{%1, %0|%0, %1}
+   rol{b}\t{%b1, %0|%0, %b1}"
+  [(set_attr "type" "rotate1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*rotlqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+	(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATE, QImode, operands)"
+  "@
+   rol{b}\t{%2, %0|%0, %2}
+   rol{b}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "QI")])
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+{
+  if (TARGET_64BIT)
+    {
+      ix86_expand_binary_operator (ROTATERT, DImode, operands);
+      DONE;
+    }
+  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    FAIL;
+  emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (rotatert:DI (match_operand:DI 1 "register_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+         (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
+                 (ashift:SI (match_dup 5)
+                            (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+         (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
+                 (ashift:SI (match_dup 3)
+                            (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
+
+(define_insn "*rotrdi3_1_one_bit_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
+  "ror{q}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand:DI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrdi3_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+	(rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "J,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
+  "@
+   ror{q}\t{%2, %0|%0, %2}
+   ror{q}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "DI")])
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
+
+(define_insn "*rotrsi3_1_one_bit"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+  "ror{l}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrsi3_1_one_bit_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "register_operand" "0")
+		       (match_operand:QI 2 "const1_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+  "ror{l}\t%k0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand:SI 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrsi3_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+	(rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+  "@
+   ror{l}\t{%2, %0|%0, %2}
+   ror{l}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rotrsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "register_operand" "0,0")
+		       (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+  "@
+   ror{l}\t{%2, %k0|%k0, %2}
+   ror{l}\t{%b2, %k0|%k0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotrhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
+
+(define_insn "*rotrhi3_one_bit"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
+  "ror{w}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+	(rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATERT, HImode, operands)"
+  "@
+   ror{w}\t{%2, %0|%0, %2}
+   ror{w}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+       (rotatert:HI (match_dup 0) (const_int 8)))
+  (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (strict_low_part (match_dup 0))
+		  (bswap:HI (match_dup 0)))
+	     (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "rotrqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
+
+(define_insn "*rotrqi3_1_one_bit"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+		     (match_operand:QI 2 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
+  "ror{b}\t%0"
+  [(set_attr "type" "rotate")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrqi3_1_one_bit_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(rotatert:QI (match_dup 0)
+		     (match_operand:QI 1 "const1_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+  "ror{b}\t%0"
+  [(set_attr "type" "rotate1")
+   (set (attr "length")
+     (if_then_else (match_operand 0 "register_operand" "")
+	(const_string "2")
+	(const_string "*")))])
+
+(define_insn "*rotrqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+	(rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		     (match_operand:QI 2 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ROTATERT, QImode, operands)"
+  "@
+   ror{b}\t{%2, %0|%0, %2}
+   ror{b}\t{%b2, %0|%0, %b2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "QI")])
+
+(define_insn "*rotrqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+	(rotatert:QI (match_dup 0)
+		     (match_operand:QI 1 "nonmemory_operand" "I,c")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   ror{b}\t{%1, %0|%0, %1}
+   ror{b}\t{%b1, %0|%0, %b1}"
+  [(set_attr "type" "rotate1")
+   (set_attr "mode" "QI")])
+
+;; Bit set / bit test instructions
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const8_operand" "")
+			 (match_operand:SI 3 "const8_operand" "")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand 1 "ext_register_operand" "")
+			 (match_operand:SI 2 "const8_operand" "")
+			 (match_operand:SI 3 "const8_operand" "")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "ext_register_operand" "")
+		      (match_operand 1 "const8_operand" "")
+		      (match_operand 2 "const8_operand" ""))
+        (match_operand 3 "register_operand" ""))]
+  ""
+{
+  /* Handle insertions to %ah et al.  */
+  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3]));
+  else
+    emit_insn (gen_movsi_insv_1 (operands[0], operands[3]));
+
+  DONE;
+})
+
+;; %%% bts, btr, btc, bt.
+;; In general these instructions are *slow* when applied to memory,
+;; since they enforce atomic operation.  When applied to registers,
+;; it depends on the cpu implementation.  They're never faster than
+;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; no point.  But in 64-bit, we can't hold the relevant immediates
+;; within the instruction itself, so operating on bits in the high
+;; 32-bits of a register becomes easier.
+;;
+;; These are slow on Nocona, but fast on Athlon64.  We do require the use
+;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+;; negdf respectively, so they can never be disabled entirely.
+
+(define_insn "*btsq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(const_int 1))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "bts{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+
+(define_insn "*btrq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+
+(define_insn "*btcq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btc{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+
+;; Allow Nocona to avoid these instructions if a register is available.
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+		   (const_int 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+		   (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (~lo, ~hi, DImode);
+  if (i >= 32)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+	      (not:DI (zero_extract:DI
+			(match_dup 0) (const_int 1) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_insn "*btdi_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:DI
+	    (match_operand:DI 0 "register_operand" "r")
+	    (const_int 1)
+	    (match_operand:DI 1 "nonmemory_operand" "rN"))
+	  (const_int 0)))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+  "bt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+
+(define_insn "*btsi"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_operand:SI 0 "register_operand" "r")
+	    (const_int 1)
+	    (match_operand:SI 1 "nonmemory_operand" "rN"))
+	  (const_int 0)))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "bt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+
+;; Store-flag instructions.
+
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
+
+;; %%% Do the expansion to SImode.  If PII, do things the xor+setcc way
+;; to avoid partial register stalls.  Otherwise do things the setcc+movzx
+;; way, which can later delete the movzx if only QImode is needed.
+
+(define_expand "s<code>"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
+  ""
+  "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
+
+(define_expand "s<code>"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
+  "TARGET_80387 || TARGET_SSE"
+  "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
+
+(define_insn "*setcc_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+(define_insn "*setcc_2"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	sete	%al
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
+;; subsequent logical operations are used to imitate conditional moves.
+;; 0xffffffff is NaN, but not in normalized form, so we can't represent
+;; it directly.
+
+(define_insn "*avx_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "avx_comparison_float_operator"
+	  [(match_operand:MODEF 2 "register_operand" "x")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_AVX"
+  "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sse_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "sse_comparison_operator"
+	  [(match_operand:MODEF 2 "register_operand" "0")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+  "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sse5_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "sse5_comparison_float_operator"
+	  [(match_operand:MODEF 2 "register_operand" "x")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_SSE5"
+  "com%Y1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "<MODE>")])
+
+
+;; Basic conditional jump instructions.
+;; We ignore the overflow flag for signed branch instructions.
+
+;; For all bCOND expanders, also expand the compare or test insn that
+;; generates reg FLAGS_REG.  Generate an equality comparison if `beq' or `bne'.
+
+(define_expand "b<code>"
+  [(set (pc)
+	(if_then_else (int_cond:CC (reg:CC FLAGS_REG)
+				   (const_int 0))
+		      (label_ref (match_operand 0 ""))
+		      (pc)))]
+  ""
+  "ix86_expand_branch (<CODE>, operands[0]); DONE;")
+
+(define_expand "b<code>"
+  [(set (pc)
+	(if_then_else (fp_cond:CC (reg:CC FLAGS_REG)
+				  (const_int 0))
+		      (label_ref (match_operand 0 ""))
+		      (pc)))]
+  "TARGET_80387 || TARGET_SSE_MATH"
+  "ix86_expand_branch (<CODE>, operands[0]); DONE;")
+
+(define_insn "*jcc_1"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "%+j%C1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+(define_insn "*jcc_2"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "%+j%c1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	je	Lfoo
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  PUT_MODE (operands[0], VOIDmode);
+})
+
+(define_split
+  [(set (pc)
+	(if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  rtx new_op0 = copy_rtx (operands[0]);
+  operands[0] = new_op0;
+  PUT_MODE (new_op0, VOIDmode);
+  PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
+					     GET_MODE (XEXP (new_op0, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op0, VOIDmode))
+    FAIL;
+})
+
+;; zero_extend in SImode is correct, since this is what combine pass
+;; generates from shift insn with QImode operand.  Actually, the mode of
+;; operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; appropriate modulo of the bit offset value.
+
+(define_insn_and_split "*jcc_btdi_rex64"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:DI
+			   (match_operand:DI 1 "register_operand" "r")
+			   (const_int 1)
+			   (zero_extend:SI
+			     (match_operand:QI 2 "register_operand" "r")))
+			 (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:DI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btdi_mask_rex64"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:DI
+			   (match_operand:DI 1 "register_operand" "r")
+			   (const_int 1)
+			   (and:SI
+			     (match_operand:SI 2 "register_operand" "r")
+			     (match_operand:SI 3 "const_int_operand" "n")))])
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x3f) == 0x3f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:DI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+(define_insn_and_split "*jcc_btsi"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SI
+			   (match_operand:SI 1 "register_operand" "r")
+			   (const_int 1)
+			   (zero_extend:SI
+			     (match_operand:QI 2 "register_operand" "r")))
+			 (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SI
+			   (match_operand:SI 1 "register_operand" "r")
+			   (const_int 1)
+			   (and:SI
+			     (match_operand:SI 2 "register_operand" "r")
+			     (match_operand:SI 3 "const_int_operand" "n")))])
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+(define_insn_and_split "*jcc_btsi_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(and:SI
+			   (lshiftrt:SI
+			     (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:QI 2 "register_operand" "r"))
+			   (const_int 1))
+			 (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask_1"
+  [(set (pc)
+  	(if_then_else
+	  (match_operator 0 "bt_comparison_operator"
+	    [(and:SI
+	       (lshiftrt:SI
+		 (match_operand:SI 1 "register_operand" "r")
+		 (subreg:QI
+		   (and:SI
+		     (match_operand:SI 2 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n")) 0))
+	       (const_int 1))
+	     (const_int 0)])
+	  (label_ref (match_operand 4 "" ""))
+	  (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+;; Define combination compare-and-branch fp compare instructions to use
+;; during early optimization.  Splitting the operation apart early makes
+;; for bad code when we want to reverse the operation.
+
+(define_insn "*fp_jcc_1_mixed"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f,x")
+			 (match_operand 2 "nonimmediate_operand" "f,xm")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_1_sse"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "x")
+			 (match_operand 2 "nonimmediate_operand" "xm")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_1_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_2_mixed"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f,x")
+			 (match_operand 2 "nonimmediate_operand" "f,xm")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_2_sse"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "x")
+			 (match_operand 2 "nonimmediate_operand" "xm")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_2_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_3_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "nonimmediate_operand" "fm")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387
+   && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_4_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "nonimmediate_operand" "fm")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387
+   && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_5_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_6_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+(define_insn "*fp_jcc_7_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "const0_operand" "")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+  "#")
+
+;; The order of operands in *fp_jcc_8_387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*fp_jcc_8<mode>_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operator 1 "float_operator"
+			   [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
+			   (match_operand 3 "register_operand" "f,f")])
+	  (label_ref (match_operand 4 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a,a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
+   && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
+   && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))"
+  "#")
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "")
+			 (match_operand 2 "nonimmediate_operand" "")])
+	  (match_operand 3 "" "")
+	  (match_operand 4 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+	                operands[3], operands[4], NULL_RTX, NULL_RTX);
+  DONE;
+})
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 1 "register_operand" "")
+			 (match_operand 2 "general_operand" "")])
+	  (match_operand 3 "" "")
+	  (match_operand 4 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+	     		operands[3], operands[4], operands[5], NULL_RTX);
+  DONE;
+})
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operator 1 "float_operator"
+			   [(match_operand:X87MODEI12 2 "memory_operand" "")])
+			   (match_operand 3 "register_operand" "")])
+	  (match_operand 4 "" "")
+	  (match_operand 5 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 6 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+			operands[3], operands[7],
+			operands[4], operands[5], operands[6], NULL_RTX);
+  DONE;
+})
+
+;; %%% Kill this when reload knows how to do it.
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operator 1 "float_operator"
+			   [(match_operand:X87MODEI12 2 "register_operand" "")])
+			   (match_operand 3 "register_operand" "")])
+	  (match_operand 4 "" "")
+	  (match_operand 5 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 6 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+  operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+			operands[3], operands[7],
+			operands[4], operands[5], operands[6], operands[2]);
+  DONE;
+})
+
+;; Unconditional and other jump instructions
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp\t%l0"
+  [(set_attr "type" "ibr")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 5)))
+   (set_attr "modrm" "0")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "*indirect_jump"
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+     relative.  Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op0, op1;
+      enum rtx_code code;
+
+      /* We can't use @GOTOFF for text labels on VxWorks;
+	 see gotoff_operand.  */
+      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+	}
+      else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = pic_offset_table_rtx;
+	}
+      else
+	{
+	  code = MINUS;
+	  op0 = pic_offset_table_rtx;
+	  op1 = operands[0];
+	}
+
+      operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+					 OPTAB_DIRECT);
+    }
+})
+
+(define_insn "*tablejump_1"
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+   (set (match_operand:QI 1 "register_operand" "")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 3 "q_regs_operand" "")
+	(zero_extend (match_dup 1)))]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Similar, but match zero_extendhisi2_and, which adds a clobber.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+   (set (match_operand:QI 1 "register_operand" "")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (parallel [(set (match_operand 3 "q_regs_operand" "")
+		   (zero_extend (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Call instructions.
+
+;; The predicates normally associated with named expanders are not properly
+;; checked for calls.  This is a bug in the generic code, but it isn't that
+;; easy to fix.  Ignore it for now and be prepared to fix things up.
+
+;; Call subroutine returning no value.
+
+(define_expand "call_pop"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3 "" "")))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0);
+  DONE;
+})
+
+(define_insn "*call_pop_0"
+  [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 2 "immediate_operand" "")))]
+  "!TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P0";
+  else
+    return "call\t%P0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_pop_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 2 "immediate_operand" "i")))]
+  "!TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    {
+      if (SIBLING_CALL_P (insn))
+	return "jmp\t%P0";
+      else
+	return "call\t%P0";
+    }
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%A0";
+  else
+    return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "" "")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(call (match_operand:QI 0 "" "")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1);
+  DONE;
+})
+
+(define_insn "*call_0"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))]
+  ""
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P0";
+  else
+    return "call\t%P0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+	 (match_operand 1 "" ""))]
+  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a"))
+	 (match_operand 1 "" ""))]
+  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "jmp\t%P0";
+  return "jmp\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	 (match_operand 1 "" ""))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_ms_sysv"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	 (match_operand 1 "" ""))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_large"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+	 (match_operand 1 "" ""))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "call\t%A0"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64"
+  [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))]
+  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "jmp\t%P0"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64_v"
+  [(call (mem:QI (reg:DI R11_REG))
+	 (match_operand 0 "" ""))]
+  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "jmp\t{*%%}r11"
+  [(set_attr "type" "call")])
+
+
+;; Call subroutine, returning value in operand 0
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+			 (match_operand:SI 2 "" "")))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 4 "" "")))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], operands[4], 0);
+  DONE;
+})
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "" "")
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "" ""))]
+  ;; Operand 2 not used on the i386.
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "" "")
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "" ""))]
+  ;; Operand 2 not used on the i386.
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1);
+  DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  /* In order to give reg-stack an easier job in validating two
+     coprocessor registers as containing a possible return value,
+     simply pretend the untyped call returns a complex long double
+     value. 
+
+     We can't use SSE_REGPARM_MAX here since callee is unprototyped
+     and should have the default ABI.  */
+
+  ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+		    operands[0], const0_rtx,
+		    GEN_INT ((TARGET_64BIT
+			      ? (DEFAULT_ABI == SYSV_ABI
+				 ? X86_64_SSE_REGPARM_MAX
+				 : X64_SSE_REGPARM_MAX)
+			      : X86_32_SSE_REGPARM_MAX)
+		    	     - 1),
+		    NULL, 0);
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; Prologue and epilogue instructions
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory across this point.
+
+(define_expand "memory_blockage"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_blockage"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for PIC code
+(define_insn "prologue_use"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Insn emitted into the body of a function to return from a function.
+;; This is only done if the function's epilogue is known to be simple.
+;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+(define_expand "return"
+  [(return)]
+  "ix86_can_use_return_insn_p ()"
+{
+  if (crtl->args.pops_args)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+      emit_jump_insn (gen_return_pop_internal (popc));
+      DONE;
+    }
+})
+
+(define_insn "return_internal"
+  [(return)]
+  "reload_completed"
+  "ret"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+;; instruction Athlon and K8 have.
+
+(define_insn "return_internal_long"
+  [(return)
+   (unspec [(const_int 0)] UNSPEC_REP)]
+  "reload_completed"
+  "rep\;ret"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "modrm" "0")])
+
+(define_insn "return_pop_internal"
+  [(return)
+   (use (match_operand:SI 0 "const_int_operand" ""))]
+  "reload_completed"
+  "ret\t%0"
+  [(set_attr "length" "3")
+   (set_attr "length_immediate" "2")
+   (set_attr "modrm" "0")])
+
+(define_insn "return_indirect_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  "reload_completed"
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Align to 16-byte boundary, max skip in op0.  Used to avoid
+;; branch prediction penalty for the third jump in a 16-byte
+;; block on K8.
+
+(define_insn "align"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
+  ""
+{
+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+  ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
+#else
+  /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
+     The align insn is used to avoid 3 jump instructions in the row to improve
+     branch prediction and the benefits hardly outweigh the cost of extra 8
+     nops on the average inserted by full alignment pseudo operation.  */
+#endif
+  return "";
+}
+  [(set_attr "length" "16")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_prologue (); DONE;")
+
+(define_insn "set_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  { return output_set_got (operands[0], NULL_RTX); }
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(label_ref (match_operand 1 "" ""))]
+	 UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  { return output_set_got (operands[0], operands[1]); }
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+  "TARGET_64BIT"
+  "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length" "6")])
+
+(define_insn "set_rip_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))]
+  "TARGET_64BIT"
+  "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length" "6")])
+
+(define_insn "set_got_offset_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(label_ref (match_operand 1 "" ""))]
+	  UNSPEC_SET_GOT_OFFSET))]
+  "TARGET_64BIT"
+  "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
+  [(set_attr "type" "imov")
+   (set_attr "length" "11")])
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (1); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (0); DONE;")
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
+
+  /* Tricky bit: we write the address of the handler to which we will
+     be returning into someone else's stack frame, one word below the
+     stack address we wish to restore.  */
+  tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
+  tmp = plus_constant (tmp, -UNITS_PER_WORD);
+  tmp = gen_rtx_MEM (Pmode, tmp);
+  emit_move_insn (tmp, ra);
+
+  if (Pmode == SImode)
+    emit_jump_insn (gen_eh_return_si (sa));
+  else
+    emit_jump_insn (gen_eh_return_di (sa));
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_<mode>"
+  [(set (pc)
+        (unspec [(match_operand:P 0 "register_operand" "c")]
+	         UNSPEC_EH_RETURN))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "ix86_expand_epilogue (2); DONE;")
+
+(define_insn "leave"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
+   (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+(define_insn "leave_rex64"
+  [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+   (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+(define_expand "ffssi2"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand" "")
+	   (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+      (clobber (match_scratch:SI 2 ""))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_CMOVE)
+    {
+      emit_insn (gen_ffs_cmove (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_expand "ffs_cmove"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "")
+				(const_int 0)))
+	      (set (match_operand:SI 0 "register_operand" "")
+		   (ctz:SI (match_dup 1)))])
+   (set (match_dup 0) (if_then_else:SI
+			(eq (reg:CCZ FLAGS_REG) (const_int 0))
+			(match_dup 2)
+			(match_dup 0)))
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_CMOVE"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_insn_and_split "*ffs_no_cmove"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (match_scratch:SI 2 "=&q"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (match_dup 1) (const_int 0)))
+	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
+   (set (strict_low_part (match_dup 3))
+	(eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
+   (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[3] = gen_lowpart (QImode, operands[2]);
+  ix86_expand_clear (operands[2]);
+})
+
+(define_insn "*ffssi_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(ctz:SI (match_dup 1)))]
+  ""
+  "bsf{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_expand "ffsdi2"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "")
+				(const_int 0)))
+	      (set (match_operand:DI 0 "register_operand" "")
+		   (ctz:DI (match_dup 1)))])
+   (set (match_dup 0) (if_then_else:DI
+			(eq (reg:CCZ FLAGS_REG) (const_int 0))
+			(match_dup 2)
+			(match_dup 0)))
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "operands[2] = gen_reg_rtx (DImode);")
+
+(define_insn "*ffsdi_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(ctz:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "bsf{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsf{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsf{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")])
+
+(define_expand "clzsi2"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand" "")
+	   (minus:SI (const_int 31)
+		     (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzsi2_abm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_insn "*bsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (const_int 31)
+		  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248
+	  (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcount<mode>2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (popcount:SWI248
+	    (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
+	  (const_int 0)))
+   (set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248 (match_dup 1)))]
+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcountsi2_cmp_zext"
+  [(set (reg FLAGS_REG)
+        (compare
+          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+          (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI(popcount:SI (match_dup 1))))]
+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_BSWAP)
+    {
+      rtx x = operands[0];
+
+      emit_move_insn (x, operands[1]);
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      DONE;
+    }
+})
+
+(define_insn "*bswapsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_BSWAP"
+  "bswap\t%0"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "length" "2")])
+
+(define_insn "*bswaphi_lowpart_1"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
+  "@
+    xchg{b}\t{%h0, %b0|%b0, %h0}
+    rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "2,4")
+   (set_attr "mode" "QI,HI")])
+
+(define_insn "bswaphi_lowpart"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "4")
+   (set_attr "mode" "HI")])
+
+(define_insn "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(bswap:DI (match_operand:DI 1 "register_operand" "0")))]
+  "TARGET_64BIT"
+  "bswap\t%0"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "length" "3")])
+
+(define_expand "clzdi2"
+  [(parallel
+     [(set (match_operand:DI 0 "register_operand" "")
+	   (minus:DI (const_int 63)
+		     (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
+      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzdi2_abm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_ABM"
+  "lzcnt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bsr_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (const_int 63)
+		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "clzhi2"
+  [(parallel
+     [(set (match_operand:HI 0 "register_operand" "")
+	   (minus:HI (const_int 15)
+		     (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzhi2_abm"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (const_int 15)
+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
+(define_expand "paritydi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(parity:DI (match_operand:DI 1 "register_operand" "")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
+				NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  if (TARGET_64BIT)
+    emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
+  else
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extendqisi2 (tmp, scratch));
+      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+    }
+  DONE;
+})
+
+(define_insn_and_split "paritydi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(parity:CC (match_operand:DI 3 "register_operand" "0")))
+   (clobber (match_scratch:DI 0 "=r"))
+   (clobber (match_scratch:SI 1 "=&r"))
+   (clobber (match_scratch:HI 2 "=Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:SI (match_dup 1) (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (parity:CC (match_dup 1)))
+      (clobber (match_dup 1))
+      (clobber (match_dup 2))])]
+{
+  operands[4] = gen_lowpart (SImode, operands[3]);
+
+  if (TARGET_64BIT)
+    {
+      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
+      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
+    }
+  else
+    operands[1] = gen_highpart (SImode, operands[3]);
+})
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(parity:SI (match_operand:SI 1 "register_operand" "")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
+(define_insn_and_split "paritysi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(parity:CC (match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 0 "=r"))
+   (clobber (match_scratch:HI 1 "=&Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:HI (match_dup 1) (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (parity:CC (match_dup 1)))
+      (clobber (match_dup 1))])]
+{
+  operands[3] = gen_lowpart (HImode, operands[2]);
+
+  emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
+  emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
+})
+
+(define_insn "*parityhi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(parity:CC (match_operand:HI 1 "register_operand" "0")))
+   (clobber (match_scratch:HI 0 "=Q"))]
+  "! TARGET_POPCNT"
+  "xor{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "length" "2")
+   (set_attr "mode" "HI")])
+
+(define_insn "*parityqi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(parity:CC (match_operand:QI 0 "register_operand" "q")))]
+  "! TARGET_POPCNT"
+  "test{b}\t%0, %0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "QI")])
+
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+		    (match_operand:SI 2 "tls_symbolic_operand" "")
+		    (match_operand:SI 3 "call_insn_operand" "")]
+		    UNSPEC_TLS_GD))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+  "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "*tls_global_dynamic_32_sun"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+		    (match_operand:SI 2 "tls_symbolic_operand" "")
+		    (match_operand:SI 3 "call_insn_operand" "")]
+		    UNSPEC_TLS_GD))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_SUN_TLS"
+  "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]}
+	push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "14")])
+
+(define_expand "tls_global_dynamic_32"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI
+		    [(match_dup 2)
+		     (match_operand:SI 1 "tls_symbolic_operand" "")
+		     (match_dup 3)]
+		    UNSPEC_TLS_GD))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (flag_pic)
+    operands[2] = pic_offset_table_rtx;
+  else
+    {
+      operands[2] = gen_reg_rtx (Pmode);
+      emit_insn (gen_set_got (operands[2]));
+    }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+		  (operands[0], operands[1], operands[2]));
+       DONE;
+    }
+  operands[3] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_global_dynamic_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+		 (match_operand:DI 3 "" "")))
+   (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+	      UNSPEC_TLS_GD)]
+  "TARGET_64BIT"
+  ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\;.word\t0x6666\;rex64\;call\t%P2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+(define_expand "tls_global_dynamic_64"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (call:DI (mem:QI (match_dup 2)) (const_int 0)))
+	      (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+			 UNSPEC_TLS_GD)])]
+  ""
+{
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+		  (operands[0], operands[1]));
+       DONE;
+    }
+  operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+                    (match_operand:SI 2 "call_insn_operand" "")]
+		   UNSPEC_TLS_LD_BASE))
+   (clobber (match_scratch:SI 3 "=d"))
+   (clobber (match_scratch:SI 4 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+  "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "11")])
+
+(define_insn "*tls_local_dynamic_base_32_sun"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+                    (match_operand:SI 2 "call_insn_operand" "")]
+		   UNSPEC_TLS_LD_BASE))
+   (clobber (match_scratch:SI 3 "=d"))
+   (clobber (match_scratch:SI 4 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_SUN_TLS"
+  "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]}
+	push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "13")])
+
+(define_expand "tls_local_dynamic_base_32"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI [(match_dup 1) (match_dup 2)]
+			      UNSPEC_TLS_LD_BASE))
+	      (clobber (match_scratch:SI 3 ""))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (flag_pic)
+    operands[1] = pic_offset_table_rtx;
+  else
+    {
+      operands[1] = gen_reg_rtx (Pmode);
+      emit_insn (gen_set_got (operands[1]));
+    }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+		  (operands[0], ix86_tls_module_base (), operands[1]));
+       DONE;
+    }
+  operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+		 (match_operand:DI 2 "" "")))
+   (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT"
+  "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_expand "tls_local_dynamic_base_64"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (call:DI (mem:QI (match_dup 1)) (const_int 0)))
+	      (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+  ""
+{
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+		  (operands[0], ix86_tls_module_base ()));
+       DONE;
+    }
+  operands[1] = ix86_tls_get_addr ();
+})
+
+;; Local dynamic of a single variable is a lose.  Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+			     (match_operand:SI 2 "call_insn_operand" "")]
+			    UNSPEC_TLS_LD_BASE)
+		 (const:SI (unspec:SI
+			    [(match_operand:SI 3 "tls_symbolic_operand" "")]
+			    UNSPEC_DTPOFF))))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+		   (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+			      UNSPEC_TLS_GD))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+;; Load and add the thread base pointer from %gs:0.
+
+(define_insn "*load_tp_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "!TARGET_64BIT"
+  "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		 (match_operand:SI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*load_tp_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_64BIT"
+  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		 (match_operand:DI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+			     UNSPEC_TLSDESC))))
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec:SI [(match_dup 1) (match_dup 3)
+		      (match_dup 2) (reg:SI SP_REG)]
+		      UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "b")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+			      UNSPEC_TLSDESC))))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+		    (match_operand:SI 2 "register_operand" "0")
+		    ;; we have to make sure %ebx still points to the GOT
+		    (match_operand:SI 3 "register_operand" "b")
+		    (reg:SI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+	(plus:SI
+	 (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+		     (match_operand:SI 4 "" "")
+		     (match_operand:SI 2 "register_operand" "b")
+		     (reg:SI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:SI (unspec:SI
+		    [(match_operand:SI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 5))]
+{
+  operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))
+   (parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+		     UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+		    (match_operand:DI 2 "register_operand" "0")
+		    (reg:DI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=&a")
+	(plus:DI
+	 (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+		     (match_operand:DI 3 "" "")
+		     (reg:DI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:DI (unspec:DI
+		    [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;;
+
+;; These patterns match the binary 387 instructions for addM3, subM3,
+;; mulM3 and divM3.  There are three patterns for each of DFmode and
+;; SFmode.  The first is the normal insn, the second the same insn but
+;; with one operand a conversion, and the third the same insn but with
+;; the other operand a conversion.  The conversion may be SFmode or
+;; SImode if the target mode DFmode, but only SImode if the target mode
+;; is SFmode.
+
+;; Gcc is slightly more smart about handling normal two address instructions
+;; so use special patterns for add and mull.
+
+(define_insn "*fop_<mode>_comm_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "1")
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "ssemul")
+	      (const_string "sseadd"))
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "fmul")
+	      (const_string "fop"))))
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "1")
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "ssemul")
+	      (const_string "sseadd"))
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "fmul")
+	      (const_string "fop"))))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "ssemul")
+	   (const_string "sseadd")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "ssemul")
+	   (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "fmul")
+	   (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "mult_operator" ""))
+                 (const_string "ssemul")
+	       (and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "div_operator" ""))
+                 (const_string "ssediv")
+	       (eq_attr "alternative" "2")
+                 (const_string "sseadd")
+	       (match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "mult_operator" ""))
+                 (const_string "ssemul")
+	       (and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "div_operator" ""))
+                 (const_string "ssediv")
+	       (eq_attr "alternative" "2")
+                 (const_string "sseadd")
+	       (match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*rcpsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RCP))]
+  "TARGET_SSE_MATH"
+  "%vrcpss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_<mode>_1_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "ssemul")
+	       (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "ssediv")
+              ]
+              (const_string "sseadd")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "ssemul")
+	       (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "ssediv")
+              ]
+              (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_<mode>_1_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_<MODEF:mode>_2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(float:MODEF
+	     (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+	   (match_operand:MODEF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<X87MODEI12:MODE>")])
+
+(define_insn "*fop_<MODEF:mode>_3_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,0")
+	   (float:MODEF
+	     (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	   [(float_extend:DF
+	     (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:DF 2 "register_operand" "0,f")]))]
+  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_5_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(match_operand:DF 1 "register_operand" "0,f")
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_6_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(float_extend:DF
+	    (match_operand:SF 1 "register_operand" "0,f"))
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_comm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "%0")
+			 (match_operand:XF 2 "register_operand" "f")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:XF 3 "mult_operator" "")
+           (const_string "fmul")
+           (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_1_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "0,f")
+			 (match_operand:XF 2 "register_operand" "f,0")]))]
+  "TARGET_80387
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float:XF
+	     (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+	   (match_operand:XF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,0")
+	   (float:XF
+	     (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	   [(float_extend:XF
+	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:XF 2 "register_operand" "0,f")]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_5_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,f")
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_6_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float_extend:XF
+	     (match_operand:MODEF 1 "register_operand" "0,f"))
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "binary_fp_operator"
+	   [(float (match_operand:X87MODEI12 1 "register_operand" ""))
+	    (match_operand 2 "register_operand" "")]))]
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
+  [(const_int 0)]
+{
+  operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
+  operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (GET_CODE (operands[3]),
+					  GET_MODE (operands[3]),
+					  operands[4],
+					  operands[2])));
+  ix86_free_from_memory (GET_MODE (operands[1]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "binary_fp_operator"
+	   [(match_operand 1 "register_operand" "")
+	    (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
+  [(const_int 0)]
+{
+  operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+  operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (GET_CODE (operands[3]),
+					  GET_MODE (operands[3]),
+					  operands[1],
+					  operands[4])));
+  ix86_free_from_memory (GET_MODE (operands[2]));
+  DONE;
+})
+
+;; FPU special functions.
+
+;; This pattern implements a no-op XFmode truncation for
+;; all fancy i386 XFmode math functions.
+
+(define_insn "truncxf<mode>2_i387_noop_unspec"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
+	UNSPEC_TRUNC_NOOP))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrtxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "sqrt_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF
+	  (float_extend:XF
+	    (match_operand:MODEF 1 "register_operand" "0"))))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*rsqrtsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+  "%vrsqrtss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_expand "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
+  DONE;
+})
+
+(define_insn "*sqrt<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "*")
+   (set_attr "amdfam10_decode" "*")])
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "")))]
+  "TARGET_USE_FANCY_MATH_387
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (<MODE>mode == SFmode
+      && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
+      DONE;
+    }
+
+  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+      emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+      DONE;
+   }
+})
+
+(define_insn "fpremxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "fmod<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+  else
+    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
+  DONE;
+})
+
+(define_insn "fprem1xf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM1_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM1_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "remainderxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "remainder<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+  else
+    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
+  DONE;
+})
+
+(define_insn "*sinxf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fsin"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*sin_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))]
+		   UNSPEC_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fsin"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cosxf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fcos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cos_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))]
+		   UNSPEC_COS))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fcos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+;; When sincos pattern is defined, sin and cos builtin functions will be
+;; expanded to sincos pattern with one of its outputs left unused.
+;; CSE pass will figure out if two sincos patterns can be combined,
+;; otherwise sincos pattern will be split back to sin or cos pattern,
+;; depending on the unused output.
+
+(define_insn "sincosxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]
+  "")
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
+  "")
+
+(define_insn "sincos_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" ""))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]
+  "")
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" ""))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]
+  "")
+
+(define_expand "sincos<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1));
+  DONE;
+})
+
+(define_insn "fptanxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operand:XF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fptan_extend<mode>xf4_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operand:MODEF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "tanxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx one = gen_reg_rtx (XFmode);
+  rtx op2 = CONST1_RTX (XFmode); /* fld1 */
+
+  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "tan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx one = gen_reg_rtx (<MODE>mode);
+  rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
+
+  emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
+					     operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "*fpatanxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+	            (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fpatan_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "u"))]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "atan2xf3"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 2 "register_operand" "")
+			       (match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "")
+
+(define_expand "atan2<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "atanxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 2)
+			       (match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "atan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (op2, CONST1_RTX (<MODE>mode));  /* fld1 */
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "asinxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+        	   (unspec:XF [(match_dup 5) (match_dup 1)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "asin<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_asinxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "acosxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+        	   (unspec:XF [(match_dup 1) (match_dup 5)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "acos<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_acosxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2x_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+})
+
+(define_expand "log<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log10xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+})
+
+(define_expand "log10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log2xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xp1xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2xp1_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "log1pxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  ix86_emit_i387_log1p (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "log1p<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+
+  operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
+
+  ix86_emit_i387_log1p (op0, operands[1]);
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fxtractxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fxtract_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logbxf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_XTRACT_FRACT))
+	      (set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "logb<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogbxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogb<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_insn "*f2xm1xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_F2XM1))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "f2xm1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*fscalexf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FSCALE_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FSCALE_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fscale"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "expNcorexf3"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_operand:XF 2 "register_operand" "")))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 9)
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 3; i < 10; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[7], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "expxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp10xf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp10xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp2xf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode));  /* fld1 */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp2xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "expm1xf2"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 9) (float_extend:XF (match_dup 13)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 7)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 8)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 10)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 11)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 12) (minus:XF (match_dup 10)
+				 (float_extend:XF (match_dup 13))))
+   (set (match_operand:XF 0 "register_operand" "")
+	(plus:XF (match_dup 12) (match_dup 7)))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 13; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  operands[13]
+    = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */
+
+  emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+})
+
+(define_expand "expm1<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expm1xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ldexpxf3"
+  [(set (match_dup 3)
+	(float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:XF 0 " register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 3)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 4)
+		   (unspec:XF [(match_dup 1) (match_dup 3)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+  operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexp<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:SI 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "scalbxf3"
+  [(parallel [(set (match_operand:XF 0 " register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_operand:XF 2 "register_operand" "")]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 3)
+		   (unspec:XF [(match_dup 1) (match_dup 2)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "scalb<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1, op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+  op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_scalbxf3 (op0, op1, op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+
+(define_insn "sse4_1_round<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
+		       (match_operand:SI 2 "const_0_to_15_operand" "n")]
+		      UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rintxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_FRNDINT))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "frndint"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "rint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+	FAIL;
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x04)));
+      else
+	ix86_expand_rint (operand0, operand1);
+    }
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_expand "round<mode>2"
+  [(match_operand:MODEF 0 "register_operand" "")
+   (match_operand:MODEF 1 "nonimmediate_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math && !flag_rounding_math"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  if (TARGET_64BIT || (<MODE>mode != DFmode))
+    ix86_expand_round (operand0, operand1);
+  else
+    ix86_expand_rounddf_32 (operand0, operand1);
+  DONE;
+})
+
+(define_insn_and_split "*fistdi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fistdi2 (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+      emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+					 operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+		   UNSPEC_FIST))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn_and_split "*fist<mode>2_1"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+  emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+					operands[2]));
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+			   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_with_temp"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]
+  "")
+
+(define_expand "lrintxf<mode>2"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+     (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		      UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387"
+  "")
+
+(define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2"
+  [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+     (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")]
+			UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)"
+  "")
+
+(define_expand "lround<MODEF:mode><SSEMODEI24:mode>2"
+  [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)
+   && !flag_trapping_math && !flag_rounding_math"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  ix86_expand_lround (operand0, operand1);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_floor"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+
+  emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_floor_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "XF")])
+
+(define_expand "floorxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "floor<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+	FAIL;
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x01)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_floorceil (operand0, operand1, true);
+      else
+	ix86_expand_floorceildf_32 (operand0, operand1, true);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_floor (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_floor_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_floor (operands[0], operands[1],
+				      operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1],
+						  operands[2], operands[3],
+						  operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_floor"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_floor_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])]
+  "")
+
+(define_insn "fist<mode>2_floor"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_floor_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])]
+  "")
+
+(define_expand "lfloorxf<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		    UNSPEC_FIST_FLOOR))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "")
+
+(define_expand "lfloor<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  if (optimize_insn_for_size_p () && TARGET_64BIT)
+    FAIL;
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_ceil"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+
+  emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1],
+				       operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_ceil_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_expand "ceilxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "ceil<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x02)));
+      else if (optimize_insn_for_size_p ())
+	FAIL;
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_floorceil (operand0, operand1, false);
+      else
+	ix86_expand_floorceildf_32 (operand0, operand1, false);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_ceil (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_ceil_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1],
+				     operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1],
+						 operands[2], operands[3],
+						 operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_ceil"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_ceil_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])]
+  "")
+
+(define_insn "fist<mode>2_ceil"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_ceil_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])]
+  "")
+
+(define_expand "lceilxf<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		    UNSPEC_FIST_CEIL))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "")
+
+(define_expand "lceil<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
+(define_expand "lceil<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_trunc"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_TRUNC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+
+  emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_trunc_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_TRUNC))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "XF")])
+
+(define_expand "btruncxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "btrunc<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x03)));
+      else if (optimize_insn_for_size_p ())
+	FAIL;
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_trunc (operand0, operand1);
+      else
+	ix86_expand_truncdf_32 (operand0, operand1);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_trunc (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_MASK_PM))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+					  operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_MASK_PM))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_expand "nearbyintxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fxam<mode>2_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:X87MODEF 1 "register_operand" "f")]
+	  UNSPEC_FXAM))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fxam\n\tfnstsw\t%0"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "fxam<mode>2_i387_with_temp"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI
+	  [(match_operand:MODEF 1 "memory_operand" "")]
+	  UNSPEC_FXAM_MEM))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)(match_dup 1))
+   (set (match_dup 0)
+	(unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
+{
+  operands[2] = gen_reg_rtx (<MODE>mode);
+
+  MEM_VOLATILE_P (operands[1]) = 1;
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "isinfxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && TARGET_C99_FUNCTIONS"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "isinf<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "nonimmediate_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && TARGET_C99_FUNCTIONS
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  /* Remove excess precision by forcing value through memory. */
+  if (memory_operand (operands[1], VOIDmode))
+    emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      rtx temp = assign_386_stack_local (<MODE>mode, slot);
+
+      emit_move_insn (temp, operands[1]);
+      emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
+    }
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "signbit<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:X87MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  rtx mask = GEN_INT (0x0200);
+
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask));
+  DONE;
+})
+
+;; Block operation instructions
+
+(define_insn "cld"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+  ""
+  "cld"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+(define_expand "movmemsi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SI 2 "nonmemory_operand" ""))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "const_int_operand" ""))
+   (use (match_operand:SI 5 "const_int_operand" ""))]
+  ""
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "movmemdi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:DI 2 "nonmemory_operand" ""))
+   (use (match_operand:DI 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "const_int_operand" ""))
+   (use (match_operand:SI 5 "const_int_operand" ""))]
+  "TARGET_64BIT"
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strmov"
+  [(set (match_dup 4) (match_operand 3 "memory_operand" ""))
+   (set (match_operand 1 "memory_operand" "") (match_dup 4))
+   (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
+
+  /* If .md ever supports :P for Pmode, these can be directly
+     in the pattern above.  */
+  operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+  operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
+
+  /* Can't use this if the user has appropriated esi or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+    {
+      emit_insn (gen_strmov_singleop (operands[0], operands[1],
+				      operands[2], operands[3],
+				      operands[5], operands[6]));
+      DONE;
+    }
+
+  operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
+})
+
+(define_expand "strmov_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 5 "" ""))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strmovdi_rex_1"
+  [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+	(mem:DI (match_operand:DI 3 "register_operand" "1")))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 2)
+		 (const_int 8)))
+   (set (match_operand:DI 1 "register_operand" "=S")
+	(plus:DI (match_dup 3)
+		 (const_int 8)))]
+  "TARGET_64BIT"
+  "movsq"
+  [(set_attr "type" "str")
+   (set_attr "mode" "DI")
+   (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_1"
+  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+	(mem:SI (match_operand:SI 3 "register_operand" "1")))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 2)
+		 (const_int 4)))
+   (set (match_operand:SI 1 "register_operand" "=S")
+	(plus:SI (match_dup 3)
+		 (const_int 4)))]
+  "!TARGET_64BIT"
+  "movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "mode" "SI")
+   (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_rex_1"
+  [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
+	(mem:SI (match_operand:DI 3 "register_operand" "1")))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 2)
+		 (const_int 4)))
+   (set (match_operand:DI 1 "register_operand" "=S")
+	(plus:DI (match_dup 3)
+		 (const_int 4)))]
+  "TARGET_64BIT"
+  "movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "mode" "SI")
+   (set_attr "memory" "both")])
+
+(define_insn "*strmovhi_1"
+  [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
+	(mem:HI (match_operand:SI 3 "register_operand" "1")))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 2)
+		 (const_int 2)))
+   (set (match_operand:SI 1 "register_operand" "=S")
+	(plus:SI (match_dup 3)
+		 (const_int 2)))]
+  "!TARGET_64BIT"
+  "movsw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strmovhi_rex_1"
+  [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
+	(mem:HI (match_operand:DI 3 "register_operand" "1")))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 2)
+		 (const_int 2)))
+   (set (match_operand:DI 1 "register_operand" "=S")
+	(plus:DI (match_dup 3)
+		 (const_int 2)))]
+  "TARGET_64BIT"
+  "movsw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strmovqi_1"
+  [(set (mem:QI (match_operand:SI 2 "register_operand" "0"))
+	(mem:QI (match_operand:SI 3 "register_operand" "1")))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 2)
+		 (const_int 1)))
+   (set (match_operand:SI 1 "register_operand" "=S")
+	(plus:SI (match_dup 3)
+		 (const_int 1)))]
+  "!TARGET_64BIT"
+  "movsb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "QI")])
+
+(define_insn "*strmovqi_rex_1"
+  [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
+	(mem:QI (match_operand:DI 3 "register_operand" "1")))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 2)
+		 (const_int 1)))
+   (set (match_operand:DI 1 "register_operand" "=S")
+	(plus:DI (match_dup 3)
+		 (const_int 1)))]
+  "TARGET_64BIT"
+  "movsb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_mov"
+  [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 5 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 6 "" ""))
+	      (set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (use (match_dup 4))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_movdi_rex64"
+  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+			    (const_int 3))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (match_operand:DI 1 "register_operand" "=S")
+        (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+		 (match_operand:DI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "TARGET_64BIT"
+  "rep movsq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_movsi"
+  [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "=D")
+        (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2")
+			    (const_int 2))
+		 (match_operand:SI 3 "register_operand" "0")))
+   (set (match_operand:SI 1 "register_operand" "=S")
+        (plus:SI (ashift:SI (match_dup 5) (const_int 2))
+		 (match_operand:SI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!TARGET_64BIT"
+  "rep movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_movsi_rex64"
+  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+			    (const_int 2))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (match_operand:DI 1 "register_operand" "=S")
+        (plus:DI (ashift:DI (match_dup 5) (const_int 2))
+		 (match_operand:DI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "TARGET_64BIT"
+  "rep movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi"
+  [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "=D")
+        (plus:SI (match_operand:SI 3 "register_operand" "0")
+		 (match_operand:SI 5 "register_operand" "2")))
+   (set (match_operand:SI 1 "register_operand" "=S")
+        (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!TARGET_64BIT"
+  "rep movsb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi_rex64"
+  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (match_operand:DI 3 "register_operand" "0")
+		 (match_operand:DI 5 "register_operand" "2")))
+   (set (match_operand:DI 1 "register_operand" "=S")
+        (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "TARGET_64BIT"
+  "rep movsb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_expand "setmemsi"
+   [(use (match_operand:BLK 0 "memory_operand" ""))
+    (use (match_operand:SI 1 "nonmemory_operand" ""))
+    (use (match_operand 2 "const_int_operand" ""))
+    (use (match_operand 3 "const_int_operand" ""))
+    (use (match_operand:SI 4 "const_int_operand" ""))
+    (use (match_operand:SI 5 "const_int_operand" ""))]
+  ""
+{
+ if (ix86_expand_setmem (operands[0], operands[1],
+			 operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "setmemdi"
+   [(use (match_operand:BLK 0 "memory_operand" ""))
+    (use (match_operand:DI 1 "nonmemory_operand" ""))
+    (use (match_operand 2 "const_int_operand" ""))
+    (use (match_operand 3 "const_int_operand" ""))
+    (use (match_operand 4 "const_int_operand" ""))
+    (use (match_operand 5 "const_int_operand" ""))]
+  "TARGET_64BIT"
+{
+ if (ix86_expand_setmem (operands[0], operands[1],
+			 operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strset"
+  [(set (match_operand 1 "memory_operand" "")
+	(match_operand 2 "register_operand" ""))
+   (parallel [(set (match_operand 0 "register_operand" "")
+		   (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+    operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
+
+  /* If .md ever supports :P for Pmode, this can be directly
+     in the pattern above.  */
+  operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+			      GEN_INT (GET_MODE_SIZE (GET_MODE
+						      (operands[2]))));
+  if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+    {
+      emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+				      operands[3]));
+      DONE;
+    }
+})
+
+(define_expand "strset_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 2 "register_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 3 "" ""))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strsetdi_rex_1"
+  [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
+	(match_operand:DI 2 "register_operand" "a"))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 1)
+		 (const_int 8)))]
+  "TARGET_64BIT"
+  "stosq"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*strsetsi_1"
+  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "a"))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 1)
+		 (const_int 4)))]
+  "!TARGET_64BIT"
+  "stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strsetsi_rex_1"
+  [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "a"))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 1)
+		 (const_int 4)))]
+  "TARGET_64BIT"
+  "stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strsethi_1"
+  [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "a"))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 1)
+		 (const_int 2)))]
+  "!TARGET_64BIT"
+  "stosw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strsethi_rex_1"
+  [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "a"))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 1)
+		 (const_int 2)))]
+  "TARGET_64BIT"
+  "stosw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strsetqi_1"
+  [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "a"))
+   (set (match_operand:SI 0 "register_operand" "=D")
+	(plus:SI (match_dup 1)
+		 (const_int 1)))]
+  "!TARGET_64BIT"
+  "stosb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "QI")])
+
+(define_insn "*strsetqi_rex_1"
+  [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "a"))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 1)
+		 (const_int 1)))]
+  "TARGET_64BIT"
+  "stosb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_stos"
+  [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "memory_operand" "") (const_int 0))
+	      (use (match_operand 3 "register_operand" ""))
+	      (use (match_dup 1))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_stosdi_rex64"
+  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+			    (const_int 3))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:DI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "TARGET_64BIT"
+  "rep stosq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_stossi"
+  [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "=D")
+        (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1")
+			    (const_int 2))
+		 (match_operand:SI 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:SI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!TARGET_64BIT"
+  "rep stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_stossi_rex64"
+  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+			    (const_int 2))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:SI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "TARGET_64BIT"
+  "rep stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_stosqi"
+  [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "=D")
+        (plus:SI (match_operand:SI 3 "register_operand" "0")
+		 (match_operand:SI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:QI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!TARGET_64BIT"
+  "rep stosb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "QI")])
+
+(define_insn "*rep_stosqi_rex64"
+  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (match_operand:DI 3 "register_operand" "0")
+		 (match_operand:DI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:QI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "TARGET_64BIT"
+  "rep stosb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(compare:SI (match_operand:BLK 1 "general_operand" "")
+		    (match_operand:BLK 2 "general_operand" "")))
+   (use (match_operand 3 "general_operand" ""))
+   (use (match_operand 4 "immediate_operand" ""))]
+  ""
+{
+  rtx addr1, addr2, out, outlow, count, countreg, align;
+
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    FAIL;
+
+  /* Can't use this if the user has appropriated esi or edi.  */
+  if (fixed_regs[SI_REG] || fixed_regs[DI_REG])
+    FAIL;
+
+  out = operands[0];
+  if (!REG_P (out))
+    out = gen_reg_rtx (SImode);
+
+  addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
+  if (addr1 != XEXP (operands[1], 0))
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  if (addr2 != XEXP (operands[2], 0))
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
+  count = operands[3];
+  countreg = ix86_zero_extend_to_Pmode (count);
+
+  /* %%% Iff we are testing strict equality, we can use known alignment
+     to good advantage.  This may be possible with combine, particularly
+     once cc0 is dead.  */
+  align = operands[4];
+
+  if (CONST_INT_P (count))
+    {
+      if (INTVAL (count) == 0)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+				     operands[1], operands[2]));
+    }
+  else
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
+      else
+	emit_insn (gen_cmpsi_1 (countreg, countreg));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+				  operands[1], operands[2]));
+    }
+
+  outlow = gen_lowpart (QImode, out);
+  emit_insn (gen_cmpintqi (outlow));
+  emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
+
+  if (operands[0] != out)
+    emit_move_insn (operands[0], out);
+
+  DONE;
+})
+
+;; Produce a tri-state integer (-1, 0, 1) from condition codes.
+
+(define_expand "cmpintqi"
+  [(set (match_dup 1)
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_dup 2)
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (minus:QI (match_dup 1)
+			     (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "operands[1] = gen_reg_rtx (QImode);
+   operands[2] = gen_reg_rtx (QImode);")
+
+;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
+;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
+
+(define_expand "cmpstrnqi_nz_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_operand 4 "memory_operand" "")
+			       (match_operand 5 "memory_operand" "")))
+	      (use (match_operand 2 "register_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_nz_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+		    (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
+   (use (match_operand:SI 6 "register_operand" "2"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 0 "register_operand" "=S"))
+   (clobber (match_operand:SI 1 "register_operand" "=D"))
+   (clobber (match_operand:SI 2 "register_operand" "=c"))]
+  "!TARGET_64BIT"
+  "repz cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_nz_rex_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+		    (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
+   (use (match_operand:DI 6 "register_operand" "2"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:DI 0 "register_operand" "=S"))
+   (clobber (match_operand:DI 1 "register_operand" "=D"))
+   (clobber (match_operand:DI 2 "register_operand" "=c"))]
+  "TARGET_64BIT"
+  "repz cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+;; The same, but the count is not known to not be zero.
+
+(define_expand "cmpstrnqi_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		(if_then_else:CC (ne (match_operand 2 "register_operand" "")
+				     (const_int 0))
+		  (compare:CC (match_operand 4 "memory_operand" "")
+			      (match_operand 5 "memory_operand" ""))
+		  (const_int 0)))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (use (reg:CC FLAGS_REG))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_1"
+  [(set (reg:CC FLAGS_REG)
+	(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
+			     (const_int 0))
+	  (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+		      (mem:BLK (match_operand:SI 5 "register_operand" "1")))
+	  (const_int 0)))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (use (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 0 "register_operand" "=S"))
+   (clobber (match_operand:SI 1 "register_operand" "=D"))
+   (clobber (match_operand:SI 2 "register_operand" "=c"))]
+  "!TARGET_64BIT"
+  "repz cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_rex_1"
+  [(set (reg:CC FLAGS_REG)
+	(if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
+			     (const_int 0))
+	  (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+		      (mem:BLK (match_operand:DI 5 "register_operand" "1")))
+	  (const_int 0)))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (use (reg:CC FLAGS_REG))
+   (clobber (match_operand:DI 0 "register_operand" "=S"))
+   (clobber (match_operand:DI 1 "register_operand" "=D"))
+   (clobber (match_operand:DI 2 "register_operand" "=c"))]
+  "TARGET_64BIT"
+  "repz cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:BLK 1 "general_operand" "")
+		    (match_operand:QI 2 "immediate_operand" "")
+		    (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+  ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "strlendi"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:BLK 1 "general_operand" "")
+		    (match_operand:QI 2 "immediate_operand" "")
+		    (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+  ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "strlenqi_1"
+  [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strlenqi_1"
+  [(set (match_operand:SI 0 "register_operand" "=&c")
+	(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
+		    (match_operand:QI 2 "register_operand" "a")
+		    (match_operand:SI 3 "immediate_operand" "i")
+		    (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
+   (clobber (match_operand:SI 1 "register_operand" "=D"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "repnz scasb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+(define_insn "*strlenqi_rex_1"
+  [(set (match_operand:DI 0 "register_operand" "=&c")
+	(unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
+		    (match_operand:QI 2 "register_operand" "a")
+		    (match_operand:DI 3 "immediate_operand" "i")
+		    (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
+   (clobber (match_operand:DI 1 "register_operand" "=D"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "repnz scasb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set_attr "prefix_rep" "1")])
+
+;; Peephole optimizations to clean up after cmpstrn*.  This should be
+;; handled in combine, but it is not currently up to the task.
+;; When used for their truth value, the cmpstrn* expanders generate
+;; code like this:
+;;
+;;   repz cmpsb
+;;   seta 	%al
+;;   setb 	%dl
+;;   cmpb 	%al, %dl
+;;   jcc	label
+;;
+;; The intermediate three instructions are unnecessary.
+
+;; This one handles cmpstrn*_nz_1...
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+		      (mem:BLK (match_operand 5 "register_operand" ""))))
+     (use (match_operand 6 "register_operand" ""))
+     (use (match_operand:SI 3 "immediate_operand" ""))
+     (clobber (match_operand 0 "register_operand" ""))
+     (clobber (match_operand 1 "register_operand" ""))
+     (clobber (match_operand 2 "register_operand" ""))])
+   (set (match_operand:QI 7 "register_operand" "")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand" "")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_dup 4))
+		      (mem:BLK (match_dup 5))))
+     (use (match_dup 6))
+     (use (match_dup 3))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])]
+  "")
+
+;; ...and this one handles cmpstrn*_1.
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_operand 6 "register_operand" "")
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+		        (mem:BLK (match_operand 5 "register_operand" "")))
+	    (const_int 0)))
+     (use (match_operand:SI 3 "immediate_operand" ""))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_operand 0 "register_operand" ""))
+     (clobber (match_operand 1 "register_operand" ""))
+     (clobber (match_operand 2 "register_operand" ""))])
+   (set (match_operand:QI 7 "register_operand" "")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand" "")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_dup 6)
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_dup 4))
+			(mem:BLK (match_dup 5)))
+	    (const_int 0)))
+     (use (match_dup 3))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])]
+  "")
+
+
+
+;; Conditional move instructions.
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (match_operand 1 "comparison_operator" "")
+			 (match_operand:DI 2 "general_operand" "")
+			 (match_operand:DI 3 "general_operand" "")))]
+  "TARGET_64BIT"
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "x86_movdicc_0_m1_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "")
+	  (const_int -1)
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "sbb{q}\t%0, %0"
+  ; Since we don't have the proper number of operands for an alu insn,
+  ; fill in all the blanks.
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_movdicc_0_m1_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "")
+			 (const_int 1)
+			 (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{q}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*movdicc_c_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(if_then_else:DI (match_operator 1 "ix86_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DI 2 "nonimmediate_operand" "rm,0")
+		      (match_operand:DI 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_64BIT && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "general_operand" "")
+			 (match_operand:SI 3 "general_operand" "")))]
+  ""
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_insn "x86_movsicc_0_m1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "")
+	  (const_int -1)
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{l}\t%0, %0"
+  ; Since we don't have the proper number of operands for an alu insn,
+  ; fill in all the blanks.
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_movsicc_0_m1_se"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "")
+			 (const_int 1)
+			 (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{l}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*movsicc_noc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI (match_operator 1 "ix86_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:SI 2 "nonimmediate_operand" "rm,0")
+		      (match_operand:SI 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "SI")])
+
+(define_expand "movhicc"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(if_then_else:HI (match_operand 1 "comparison_operator" "")
+			 (match_operand:HI 2 "general_operand" "")
+			 (match_operand:HI 3 "general_operand" "")))]
+  "TARGET_HIMODE_MATH"
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movhicc_noc"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(if_then_else:HI (match_operator 1 "ix86_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:HI 2 "nonimmediate_operand" "rm,0")
+		      (match_operand:HI 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "HI")])
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(if_then_else:QI (match_operand 1 "comparison_operator" "")
+			 (match_operand:QI 2 "general_operand" "")
+			 (match_operand:QI 3 "general_operand" "")))]
+  "TARGET_QIMODE_MATH"
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn_and_split "*movqicc_noc"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+				[(match_operand 4 "flags_reg_operand" "")
+				 (const_int 0)])
+		      (match_operand:QI 2 "register_operand" "r,0")
+		      (match_operand:QI 3 "register_operand" "0,r")))]
+  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 2)
+		      (match_dup 3)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_lowpart (SImode, operands[3]);"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "SI")])
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(if_then_else:X87MODEF
+	  (match_operand 1 "comparison_operator" "")
+	  (match_operand:X87MODEF 2 "register_operand" "")
+	  (match_operand:X87MODEF 3 "register_operand" "")))]
+  "(TARGET_80387 && TARGET_CMOVE)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movsfcc_1_387"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "SF,SF,SI,SI")])
+
+(define_insn "*movdfcc_1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   #
+   #"
+  [(set_attr "type" "fcmov,fcmov,multi,multi")
+   (set_attr "mode" "DF")])
+
+(define_insn "*movdfcc_1_rex64"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(match_operand 4 "flags_reg_operand" "")
+				 (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "")
+		      (match_operand:DF 3 "nonimmediate_operand" "")))]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 5)
+		      (match_dup 6)))
+   (set (match_dup 3)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 7)
+		      (match_dup 8)))]
+  "split_di (&operands[2], 2, &operands[5], &operands[7]);
+   split_di (&operands[0], 1, &operands[2], &operands[3]);")
+
+(define_insn "*movxfcc_1"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:XF 2 "register_operand" "f,0")
+		      (match_operand:XF 3 "register_operand" "0,f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov")
+   (set_attr "mode" "XF")])
+
+;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; SSE5 conditional move
+(define_insn "*sse5_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(if_then_else:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x,0")
+	  (match_operand:MODEF 2 "register_operand" "0,x")
+	  (match_operand:MODEF 3 "register_operand" "x,x")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+  "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "<maxminfprefix>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*avx_ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; Make two stack loads independent:
+;;   fld aa              fld aa
+;;   fld %st(0)     ->   fld bb
+;;   fmul bb             fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+  [(set (match_operand 0 "fp_register_operand" "")
+	(match_operand 1 "fp_register_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "binary_fp_operator"
+	   [(match_dup 0)
+	    (match_operand 3 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+
+  ;; The % modifier is not operational anymore in peephole2's, so we have to
+  ;; swap the operands manually in the case of addition and multiplication.
+  "if (COMMUTATIVE_ARITH_P (operands[2]))
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+				 operands[0], operands[1]);
+   else
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+				 operands[1], operands[0]);")
+
+;; Conditional addition patterns
+(define_expand "add<mode>cc"
+  [(match_operand:SWI 0 "register_operand" "")
+   (match_operand 1 "comparison_operator" "")
+   (match_operand:SWI 2 "register_operand" "")
+   (match_operand:SWI 3 "const_int_operand" "")]
+  ""
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
+
+
+;; Misc patterns (?)
+
+;; This pattern exists to put a dependency on all ebp-based memory accesses.
+;; Otherwise there will be nothing to keep
+;;
+;; [(set (reg ebp) (reg esp))]
+;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
+;;  (clobber (eflags)]
+;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
+;;
+;; in proper program order.
+(define_insn "pro_epilogue_adjust_stack_1"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "0,r")
+	         (match_operand:SI 2 "immediate_operand" "i,i")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOV:
+      return "mov{l}\t{%1, %0|%0, %1}";
+
+    case TYPE_ALU:
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+	          && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{l}\t{%2, %0|%0, %2}";
+	}
+      return "add{l}\t{%2, %0|%0, %2}";
+
+    case TYPE_LEA:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{l}\t{%a2, %0|%0, %a2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "type")
+	(cond [(eq_attr "alternative" "0")
+		 (const_string "alu")
+	       (match_operand:SI 2 "const0_operand" "")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set_attr "mode" "SI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "0,r")
+		 (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOV:
+      return "mov{q}\t{%1, %0|%0, %1}";
+
+    case TYPE_ALU:
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+	          && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{q}\t{%2, %0|%0, %2}";
+	}
+      return "add{q}\t{%2, %0|%0, %2}";
+
+    case TYPE_LEA:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{q}\t{%a2, %0|%0, %a2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "type")
+	(cond [(eq_attr "alternative" "0")
+		 (const_string "alu")
+	       (match_operand:DI 2 "const0_operand" "")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set_attr "mode" "DI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "0,r")
+		 (match_operand:DI 3 "immediate_operand" "i,i")))
+   (use (match_operand:DI 2 "register_operand" "r,r"))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      return "add{q}\t{%2, %0|%0, %2}";
+
+    case TYPE_LEA:
+      operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]);
+      return "lea{q}\t{%a2, %0|%0, %a2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,lea")
+   (set_attr "mode" "DI")])
+
+(define_insn "allocate_stack_worker_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")]
+			    UNSPECV_STACK_PROBE))
+   (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_STACK_PROBE"
+  "call\t___chkstk"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_insn "allocate_stack_worker_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")]
+			    UNSPECV_STACK_PROBE))
+   (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1)))
+   (clobber (reg:DI R10_REG))
+   (clobber (reg:DI R11_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_STACK_PROBE"
+  "call\t___chkstk"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_expand "allocate_stack"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "general_operand" "")]
+  "TARGET_STACK_PROBE"
+{
+  rtx x;
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT 0
+#endif
+
+  if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
+    {
+      x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1],
+			       stack_pointer_rtx, 0, OPTAB_DIRECT);
+      if (x != stack_pointer_rtx)
+	emit_move_insn (stack_pointer_rtx, x);
+    }
+  else
+    {
+      x = copy_to_mode_reg (Pmode, operands[1]);
+      if (TARGET_64BIT)
+	x = gen_allocate_stack_worker_64 (x, x);
+      else
+	x = gen_allocate_stack_worker_32 (x, x);
+      emit_insn (x);
+    }
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+})
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "!TARGET_64BIT && flag_pic"
+{
+#if TARGET_MACHO
+  if (TARGET_MACHO)
+    {
+      rtx xops[3];
+      rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+      rtx label_rtx = gen_label_rtx ();
+      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+      xops[0] = xops[1] = picreg;
+      xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
+      ix86_expand_binary_operator (MINUS, SImode, xops);
+    }
+  else
+#endif
+    emit_insn (gen_set_got (pic_offset_table_rtx));
+  DONE;
+})
+
+;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "promotable_binary_operator"
+	   [(match_operand 1 "register_operand" "")
+	    (match_operand 2 "aligned_operand" "")]))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ((GET_MODE (operands[0]) == HImode
+	&& ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
+            /* ??? next two lines just !satisfies_constraint_K (...) */
+	    || !CONST_INT_P (operands[2])
+	    || satisfies_constraint_K (operands[2])))
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   if (GET_CODE (operands[3]) != ASHIFT)
+     operands[2] = gen_lowpart (SImode, operands[2]);
+   PUT_MODE (operands[3], SImode);")
+
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(and (match_operand 3 "aligned_operand" "")
+		(match_operand 4 "const_int_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand 1 "register_operand" "")
+	(and (match_dup 3) (match_dup 4)))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && optimize_insn_for_speed_p ()
+   && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
+       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
+			            (const_int 0)]))
+	      (set (match_dup 1)
+		   (and:SI (match_dup 3) (match_dup 4)))])]
+{
+  operands[4]
+    = gen_int_mode (INTVAL (operands[4])
+		    & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand:HI 2 "aligned_operand" "")
+		(match_operand:HI 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ! TARGET_FAST_PREFIX
+   && optimize_insn_for_speed_p ()
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		         (const_int 0)]))]
+{
+  operands[3]
+    = gen_int_mode (INTVAL (operands[3])
+		    & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(neg (match_operand 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(parallel [(set (match_dup 0)
+		   (neg:SI (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(not (match_operand 1 "register_operand" "")))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(set (match_dup 0)
+	(not:SI (match_dup 1)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand 2 "register_operand" "")
+		      (match_operand 3 "register_operand" "")))]
+  "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(set (match_dup 0)
+	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_lowpart (SImode, operands[3]);")
+
+
+;; RTL Peephole optimizations, run before sched2.  These primarily look to
+;; transform a complex memory operation into two memory to register operations.
+
+;; Don't push memory operands
+(define_peephole2
+  [(set (match_operand:SI 0 "push_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (match_scratch:SI 2 "r")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "push_operand" "")
+	(match_operand:DI 1 "memory_operand" ""))
+   (match_scratch:DI 2 "r")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; We need to handle SFmode only, because DFmode and XFmode is split to
+;; SImode pushes.
+(define_peephole2
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))
+   (match_scratch:SF 2 "r")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "push_operand" "")
+	(match_operand:HI 1 "memory_operand" ""))
+   (match_scratch:HI 2 "r")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:QI 0 "push_operand" "")
+	(match_operand:QI 1 "memory_operand" ""))
+   (match_scratch:QI 2 "q")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; Don't move an immediate directly to memory when the instruction
+;; gets too big.
+(define_peephole2
+  [(match_scratch:SI 1 "r")
+   (set (match_operand:SI 0 "memory_operand" "")
+        (const_int 0))]
+  "optimize_insn_for_speed_p ()
+   && ! TARGET_USE_MOV0
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 1) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(match_scratch:HI 1 "r")
+   (set (match_operand:HI 0 "memory_operand" "")
+        (const_int 0))]
+  "optimize_insn_for_speed_p ()
+   && ! TARGET_USE_MOV0
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 2) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 1))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+  [(match_scratch:QI 1 "q")
+   (set (match_operand:QI 0 "memory_operand" "")
+        (const_int 0))]
+  "optimize_insn_for_speed_p ()
+   && ! TARGET_USE_MOV0
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 2) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 1))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (set (match_operand:SI 0 "memory_operand" "")
+        (match_operand:SI 1 "immediate_operand" ""))]
+  "optimize_insn_for_speed_p ()
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "memory_operand" "")
+        (match_operand:HI 1 "immediate_operand" ""))]
+  "optimize_insn_for_speed_p ()
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(match_scratch:QI 2 "q")
+   (set (match_operand:QI 0 "memory_operand" "")
+        (match_operand:QI 1 "immediate_operand" ""))]
+  "optimize_insn_for_speed_p ()
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; Don't compare memory with zero, load and use a test instead.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+ 	(match_operator 1 "compare_operator"
+	  [(match_operand:SI 2 "memory_operand" "")
+	   (const_int 0)]))
+   (match_scratch:SI 3 "r")]
+  "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
+  "")
+
+;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
+;; Don't split NOTs with a displacement operand, because resulting XOR
+;; will not be pairable anyway.
+;;
+;; On AMD K6, NOT is vector decoded with memory operand that cannot be
+;; represented using a modRM byte.  The XOR replacement is long decoded,
+;; so this split helps here as well.
+;;
+;; Note: Can't do this as a regular split because we can't get proper
+;; lifetime information then.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "optimize_insn_for_speed_p ()
+   && ((TARGET_NOT_UNPAIRABLE
+        && (!MEM_P (operands[0])
+            || !memory_displacement_operand (operands[0], SImode)))
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (xor:SI (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "optimize_insn_for_speed_p ()
+   && ((TARGET_NOT_UNPAIRABLE
+        && (!MEM_P (operands[0])
+            || !memory_displacement_operand (operands[0], HImode)))
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (xor:HI (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "optimize_insn_for_speed_p ()
+   && ((TARGET_NOT_UNPAIRABLE
+        && (!MEM_P (operands[0])
+            || !memory_displacement_operand (operands[0], QImode)))
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (xor:QI (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+;; Non pairable "test imm, reg" instructions can be translated to
+;; "and imm, reg" if reg dies.  The "and" form is also shorter (one
+;; byte opcode instead of two, have a short form for byte operands),
+;; so do it for other CPUs as well.  Given that the value was dead,
+;; this should not create any new dependencies.  Pass on the sub-word
+;; versions if we're concerned about partial register stalls.
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:SI (match_operand:SI 2 "register_operand" "")
+		   (match_operand:SI 3 "immediate_operand" ""))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && (true_regnum (operands[2]) != AX_REG
+       || satisfies_constraint_K (operands[3]))
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:SI (match_dup 2) (match_dup 3)))])]
+  "")
+
+;; We don't need to handle HImode case, because it will be promoted to SImode
+;; on ! TARGET_PARTIAL_REG_STALL
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:QI (match_operand:QI 2 "register_operand" "")
+		   (match_operand:QI 3 "immediate_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:QI (match_dup 2) (match_dup 3)))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:SI
+	     (zero_extract:SI
+	       (match_operand 2 "ext_register_operand" "")
+	       (const_int 8)
+	       (const_int 8))
+	     (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1
+		     [(and:SI
+			(zero_extract:SI
+			  (match_dup 2)
+			  (const_int 8)
+			  (const_int 8))
+			(match_dup 3))
+		      (const_int 0)]))
+	      (set (zero_extract:SI (match_dup 2)
+				    (const_int 8)
+				    (const_int 8))
+		   (and:SI
+		     (zero_extract:SI
+		       (match_dup 2)
+		       (const_int 8)
+		       (const_int 8))
+		     (match_dup 3)))])]
+  "")
+
+;; Don't do logical operations with memory inputs.
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "memory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "memory_operand" "")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+;; Prefer Load+RegOp to Mov+MemOp.  Watch out for cases when the memory address
+;; refers to the destination of the load!
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (match_dup 0)
+                   (match_operator:SI 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 2 "memory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 0) (match_dup 4))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);")
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+        (match_operand 1 "register_operand" ""))
+   (set (match_dup 0)
+                   (match_operator 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand 2 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) 
+       || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0)
+        (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]
+  "")
+
+; Don't do logical operations with memory outputs
+;
+; These two don't make sense for PPro/PII -- we're expanding a 4-uop
+; instruction into two 1-uop insns plus a 2-uop insn.  That last has
+; the same decoder scheduling characteristics as the original.
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "nonmemory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "nonmemory_operand" "")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; Attempt to always use XOR for zeroing registers.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "const0_operand" ""))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && GENERAL_REG_P (operands[0])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (word_mode, operands[0]);
+})
+
+(define_peephole2
+  [(set (strict_low_part (match_operand 0 "register_operand" ""))
+	(const_int 0))]
+  "(GET_MODE (operands[0]) == QImode
+    || GET_MODE (operands[0]) == HImode)
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(const_int -1))]
+  "(GET_MODE (operands[0]) == HImode
+    || GET_MODE (operands[0]) == SImode
+    || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
+   && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int -1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode,
+			      operands[0]);")
+
+;; Attempt to convert simple leas to adds. These can be created by
+;; move expanders.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "nonmemory_operand" "")))]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "nonmemory_operand" "")) 0))]
+  "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])"
+  [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+  	(plus:DI (match_dup 0)
+		 (match_operand:DI 1 "x86_64_general_operand" "")))]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(mult:SI (match_dup 0)
+		 (match_operand:SI 1 "const_int_operand" "")))]
+  "exact_log2 (INTVAL (operands[1])) >= 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+  	(mult:DI (match_dup 0)
+		 (match_operand:DI 1 "const_int_operand" "")))]
+  "exact_log2 (INTVAL (operands[1])) >= 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:DI 2 "const_int_operand" "")) 0))]
+  "exact_log2 (INTVAL (operands[2])) >= 0
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+;; The ESP adjustments can be done by the push and pop instructions.  Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
+;; many CPUs it is also faster, since special hardware to avoid esp
+;; dependencies is present.
+
+;; While some of these conversions may be done using splitters, we use peepholes
+;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+
+;; Convert prologue esp subtractions to push.
+;; We need register to push.  In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live.  First choice will also most likely in
+;; fewer dependencies.  On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead.  We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+	      (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+   (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+	      (clobber (mem:BLK (scratch)))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (match_scratch:SI 1 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+	      (clobber (mem:BLK (scratch)))])
+   (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p ()"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+	      (clobber (mem:BLK (scratch)))])
+   (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+  "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (match_scratch:SI 1 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+   (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 0 "r")
+   (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p ()"
+  [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+   (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+  "")
+
+;; Convert compares with 1 to shorter inc/dec operations when CF is not
+;; required and register dies.  Similarly for 128 to -128.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(match_operand 2 "register_operand" "")
+	   (match_operand 3 "const_int_operand" "")]))]
+  "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_size)
+     && incdec_operand (operands[3], GET_MODE (operands[3])))
+    || (!TARGET_FUSE_CMP_AND_BRANCH
+	&& INTVAL (operands[3]) == 128))
+   && ix86_match_ccmode (insn, CCGCmode)
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
+	      (clobber (match_dup 2))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+	      (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+   (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+  [(clobber (match_dup 0))
+   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+  [(clobber (match_dup 0))
+   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+   (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+	      (clobber (mem:BLK (scratch)))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (match_scratch:DI 1 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+	      (clobber (mem:BLK (scratch)))])
+   (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p ()"
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+	      (clobber (mem:BLK (scratch)))])
+   (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+  "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+  "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (match_scratch:DI 1 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+   (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+  "")
+
+(define_peephole2
+  [(match_scratch:DI 0 "r")
+   (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p ()"
+  [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+   (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+	      (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+  "")
+
+;; Convert imul by three, five and nine into lea
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (mult:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[2]) == 3
+   || INTVAL (operands[2]) == 5
+   || INTVAL (operands[2]) == 9"
+  [(set (match_dup 0)
+        (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+                 (match_dup 1)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                   (match_operand:SI 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p ()
+   && (INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5
+       || INTVAL (operands[2]) == 9)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+        (plus:SI (mult:SI (match_dup 0) (match_dup 2))
+                 (match_dup 0)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (mult:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:DI 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT
+   && (INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5
+       || INTVAL (operands[2]) == 9)"
+  [(set (match_dup 0)
+        (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+                 (match_dup 1)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+                   (match_operand:DI 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT
+   && optimize_insn_for_speed_p ()
+   && (INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5
+       || INTVAL (operands[2]) == 9)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+        (plus:DI (mult:DI (match_dup 0) (match_dup 2))
+                 (match_dup 0)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+;; Imul $32bit_imm, mem, reg is vector decoded, while
+;; imul $32bit_imm, reg, reg is direct decoded.
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mult:DI (match_operand:DI 1 "memory_operand" "")
+			    (match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (mult:SI (match_operand:SI 1 "memory_operand" "")
+			    (match_operand:SI 2 "immediate_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (zero_extend:DI
+		     (mult:SI (match_operand:SI 1 "memory_operand" "")
+			      (match_operand:SI 2 "immediate_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+"")
+
+;; imul $8/16bit_imm, regmem, reg is vector decoded.
+;; Convert it into imul reg, reg
+;; It would be better to force assembler to encode instruction using long
+;; immediate, but there is apparently no way to do so.
+(define_peephole2
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+			    (match_operand:DI 2 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_scratch:DI 3 "r")]
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+   && satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			    (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_scratch:SI 3 "r")]
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+   && satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (mult:HI (match_operand:HI 1 "nonimmediate_operand" "")
+			    (match_operand:HI 2 "immediate_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_scratch:HI 3 "r")]
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;;  sall    $2, %eax
+;;  movl    4(%esp), %edx
+;;  addl    %edx, %eax
+;; instead of pre-splitting:
+;;  sall    $2, %eax
+;;  addl    4(%esp), %eax
+;; Turn it into:
+;;  movl    4(%esp), %edx
+;;  leal    (%edx,%eax,4), %eax
+
+(define_peephole2
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (ashift (match_operand 1 "register_operand" "")
+			   (match_operand 2 "const_int_operand" "")))
+	       (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand 3 "register_operand")
+        (match_operand 4 "x86_64_general_operand" ""))
+   (parallel [(set (match_operand 5 "register_operand" "")
+		   (plus (match_operand 6 "register_operand" "")
+			 (match_operand 7 "register_operand" "")))
+		   (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
+   /* Validate MODE for lea.  */
+   && ((!TARGET_PARTIAL_REG_STALL
+	&& (GET_MODE (operands[0]) == QImode
+	    || GET_MODE (operands[0]) == HImode))
+       || GET_MODE (operands[0]) == SImode
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   /* We reorder load and the shift.  */
+   && !rtx_equal_p (operands[1], operands[3])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   /* Last PLUS must consist of operand 0 and 3.  */
+   && !rtx_equal_p (operands[0], operands[3])
+   && (rtx_equal_p (operands[3], operands[6])
+       || rtx_equal_p (operands[3], operands[7]))
+   && (rtx_equal_p (operands[0], operands[6])
+       || rtx_equal_p (operands[0], operands[7]))
+   /* The intermediate operand 0 must die or be same as output.  */
+   && (rtx_equal_p (operands[0], operands[5])
+       || peep2_reg_dead_p (3, operands[0]))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (match_dup 1))]
+{
+  enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+  int scale = 1 << INTVAL (operands[2]);
+  rtx index = gen_lowpart (Pmode, operands[1]);
+  rtx base = gen_lowpart (Pmode, operands[3]);
+  rtx dest = gen_lowpart (mode, operands[5]);
+
+  operands[1] = gen_rtx_PLUS (Pmode, base,
+  			      gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+  if (mode != Pmode)
+    operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+  operands[0] = dest;
+})
+
+;; Call-value patterns last so that the wildcard operand does not
+;; disrupt insn-recog's switch tables.
+
+(define_insn "*call_value_pop_0"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3 "immediate_operand" "")))]
+  "!TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P1";
+  else
+    return "call\t%P1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3 "immediate_operand" "i")))]
+  "!TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    {
+      if (SIBLING_CALL_P (insn))
+	return "jmp\t%P1";
+      else
+	return "call\t%P1";
+    }
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%A1";
+  else
+    return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))]
+  "!TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P1";
+  else
+    return "call\t%P1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P1";
+  else
+    return "call\t%P1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P1";
+  else
+    return "call\t%P1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+	      (match_operand:SI 2 "" "")))]
+  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a"))
+	      (match_operand:SI 2 "" "")))]
+  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "jmp\t%P1";
+  return "jmp\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+	      (match_operand:DI 2 "" "")))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI 27))
+   (clobber (reg:TI 28))
+   (clobber (reg:TI 45))
+   (clobber (reg:TI 46))
+   (clobber (reg:TI 47))
+   (clobber (reg:TI 48))
+   (clobber (reg:TI 49))
+   (clobber (reg:TI 50))
+   (clobber (reg:TI 51))
+   (clobber (reg:TI 52))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_large"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+	      (match_operand:DI 2 "" "")))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "call\t%A1"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "" "")))]
+  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "jmp\t%P1"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64_v"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (reg:DI R11_REG))
+	      (match_operand:DI 1 "" "")))]
+  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "jmp\t{*%%}r11"
+  [(set_attr "type" "callv")])
+
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often
+;; caught for use by garbage collectors and the like.  Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 6))]
+  ""
+  { return ASM_SHORT "0x0b0f"; }
+  [(set_attr "length" "2")])
+
+(define_expand "sse_prologue_save"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (unspec:BLK [(reg:DI 21)
+				(reg:DI 22)
+				(reg:DI 23)
+				(reg:DI 24)
+				(reg:DI 25)
+				(reg:DI 26)
+				(reg:DI 27)
+				(reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+	      (use (match_operand:DI 1 "register_operand" ""))
+	      (use (match_operand:DI 2 "immediate_operand" ""))
+	      (use (label_ref:DI (match_operand 3 "" "")))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*sse_prologue_save_insn"
+  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+			  (match_operand:DI 4 "const_int_operand" "n")))
+	(unspec:BLK [(reg:DI 21)
+		     (reg:DI 22)
+		     (reg:DI 23)
+		     (reg:DI 24)
+		     (reg:DI 25)
+		     (reg:DI 26)
+		     (reg:DI 27)
+		     (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+   (use (match_operand:DI 1 "register_operand" "r"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (label_ref:DI (match_operand 3 "" "X")))]
+  "TARGET_64BIT
+   && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
+   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
+{
+  int i;
+  operands[0] = gen_rtx_MEM (Pmode,
+			     gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+  /* VEX instruction with a REX prefix will #UD.  */
+  if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
+    gcc_unreachable ();
+
+  output_asm_insn ("jmp\t%A1", operands);
+  for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+    {
+      operands[4] = adjust_address (operands[0], DImode, i*16);
+      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
+      PUT_MODE (operands[4], TImode);
+      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
+        output_asm_insn ("rex", operands);
+      output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
+    }
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (operands[3]));
+  return "";
+}
+  [(set_attr "type" "other")
+   (set_attr "length_immediate" "0")
+   (set_attr "length_address" "0")
+   (set (attr "length")
+     (if_then_else
+       (eq (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "34")
+       (const_string "42")))
+   (set_attr "memory" "store")
+   (set_attr "modrm" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+	     (match_operand:SI 1 "const_int_operand" "")
+	     (match_operand:SI 2 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+  int rw = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (rw == 0 || rw == 1);
+  gcc_assert (locality >= 0 && locality <= 3);
+  gcc_assert (GET_MODE (operands[0]) == Pmode
+	      || GET_MODE (operands[0]) == VOIDmode);
+
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     supported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+    operands[2] = GEN_INT (3);
+  else
+    operands[1] = const0_rtx;
+})
+
+(define_insn "*prefetch_sse"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (const_int 0)
+	     (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
+{
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
+
+  int locality = INTVAL (operands[1]);
+  gcc_assert (locality >= 0 && locality <= 3);
+
+  return patterns[locality];
+}
+  [(set_attr "type" "sse")
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_sse_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (const_int 0)
+	     (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && TARGET_64BIT"
+{
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
+
+  int locality = INTVAL (operands[1]);
+  gcc_assert (locality >= 0 && locality <= 3);
+
+  return patterns[locality];
+}
+  [(set_attr "type" "sse")
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (const_int 3))]
+  "TARGET_3DNOW && !TARGET_64BIT"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (const_int 3))]
+  "TARGET_3DNOW && TARGET_64BIT"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_tls_protect_set_di (operands[0],
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  else
+    emit_insn (gen_stack_tls_protect_set_si (operands[0],
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_set_di (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+#endif
+  DONE;
+})
+
+(define_insn "stack_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_set_di"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_di"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  {
+     /* The kernel uses a different segment register for performance reasons; a
+        system call would not have to trash the userspace segment register,
+        which would be expensive */
+     if (ix86_cmodel != CM_KERNEL)
+        return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+     else
+        return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+  }
+  [(set_attr "type" "multi")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  ix86_compare_emitted = flags;
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_tls_protect_test_di (flags, operands[0],
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  else
+    emit_insn (gen_stack_tls_protect_test_si (flags, operands[0],
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1]));
+#endif
+  emit_jump_insn (gen_beq (operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+		     (match_operand:SI 2 "memory_operand" "m")]
+		    UNSPEC_SP_TEST))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  ""
+  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_test_di"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+		     (match_operand:DI 2 "memory_operand" "m")]
+		    UNSPEC_SP_TEST))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  "TARGET_64BIT"
+  "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_si"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+		     (match_operand:SI 2 "const_int_operand" "i")]
+		    UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:SI 3 "=r"))]
+  ""
+  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_di"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+		     (match_operand:DI 2 "const_int_operand" "i")]
+		    UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:DI 3 "=r"))]
+  "TARGET_64BIT"
+  {
+     /* The kernel uses a different segment register for performance reasons; a
+        system call would not have to trash the userspace segment register,
+        which would be expensive */
+     if (ix86_cmodel != CM_KERNEL)
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}";
+     else
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}";
+  }
+  [(set_attr "type" "multi")])
+
+(define_mode_iterator CRC32MODE [QI HI SI])
+(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
+(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
+
+(define_insn "sse4_2_crc32<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "register_operand" "0")
+	   (match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
+	  UNSPEC_CRC32))]
+  "TARGET_SSE4_2"
+  "crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(match_operand:DI 1 "register_operand" "0")
+	   (match_operand:DI 2 "nonimmediate_operand" "rm")]
+	  UNSPEC_CRC32))]
+  "TARGET_SSE4_2 && TARGET_64BIT"
+  "crc32q\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(include "mmx.md")
+(include "sse.md")
+(include "sync.md")