view gcc/config/i386/i386.md @ 146:351920fa3827

merge
author anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Sun, 01 Mar 2020 16:13:28 +0900
parents d34655255c78 1830386684a0
children
line wrap: on
line source

;; GCC machine description for IA-32 and x86-64.
;; Copyright (C) 1988-2020 Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.  */
;;
;; The original PO technology requires these to be ordered by speed,
;; so that assigner will pick the fastest.
;;
;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
;;
;; The special asm out single letter directives following a '%' are:
;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
;; C -- print opcode suffix for set/cmov insn.
;; c -- like C, but print reversed condition
;; F,f -- likewise, but for floating-point.
;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
;;      otherwise nothing
;; R -- print the prefix for register names.
;; z -- print the opcode suffix for the size of the current operand.
;; Z -- likewise, with special suffixes for x87 instructions.
;; * -- print a star (in certain assembler syntax)
;; A -- print an absolute memory reference.
;; E -- print address with DImode register names if TARGET_64BIT.
;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
;; s -- print a shift double count, followed by the assemblers argument
;;	delimiter.
;; b -- print the QImode name of the register for the indicated operand.
;;	%b0 would print %al if operands[0] is reg 0.
;; w --  likewise, print the HImode name of the register.
;; k --  likewise, print the SImode name of the register.
;; q --  likewise, print the DImode name of the register.
;; x --  likewise, print the V4SFmode name of the register.
;; t --  likewise, print the V8SFmode name of the register.
;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
;; y -- print "st(0)" instead of "st" as a register.
;; d -- print duplicated register operand for AVX instruction.
;; D -- print condition for SSE cmp instruction.
;; P -- if PIC, print an @PLT suffix.
;; p -- print raw symbol name.
;; X -- don't print any sort of PIC '@' suffix for a symbol.
;; & -- print some in-use local-dynamic symbol name.
;; H -- print a memory address offset by 8; used for sse high-parts
;; K -- print HLE lock prefix
;; Y -- print condition for XOP pcom* instruction.
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).
;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.

(define_c_enum "unspec" [
  ;; Relocation specifiers
  UNSPEC_GOT
  UNSPEC_GOTOFF
  UNSPEC_GOTPCREL
  UNSPEC_GOTTPOFF
  UNSPEC_TPOFF
  UNSPEC_NTPOFF
  UNSPEC_DTPOFF
  UNSPEC_GOTNTPOFF
  UNSPEC_INDNTPOFF
  UNSPEC_PLTOFF
  UNSPEC_MACHOPIC_OFFSET
  UNSPEC_PCREL
  UNSPEC_SIZEOF

  ;; Prologue support
  UNSPEC_STACK_ALLOC
  UNSPEC_SET_GOT
  UNSPEC_SET_RIP
  UNSPEC_SET_GOT_OFFSET
  UNSPEC_MEMORY_BLOCKAGE
  UNSPEC_PROBE_STACK

  ;; TLS support
  UNSPEC_TP
  UNSPEC_TLS_GD
  UNSPEC_TLS_LD_BASE
  UNSPEC_TLSDESC
  UNSPEC_TLS_IE_SUN

  ;; Other random patterns
  UNSPEC_SCAS
  UNSPEC_FNSTSW
  UNSPEC_SAHF
  UNSPEC_NOTRAP
  UNSPEC_PARITY
  UNSPEC_FSTCW
  UNSPEC_REP
  UNSPEC_LD_MPIC	; load_macho_picbase
  UNSPEC_TRUNC_NOOP
  UNSPEC_DIV_ALREADY_SPLIT
  UNSPEC_PAUSE
  UNSPEC_LEA_ADDR
  UNSPEC_XBEGIN_ABORT
  UNSPEC_STOS
  UNSPEC_PEEPSIB
  UNSPEC_INSN_FALSE_DEP
  UNSPEC_SBB

  ;; For SSE/MMX support:
  UNSPEC_FIX_NOTRUNC
  UNSPEC_MASKMOV
  UNSPEC_MOVMSK
  UNSPEC_RCP
  UNSPEC_RSQRT
  UNSPEC_PSADBW

  ;; Generic math support
  UNSPEC_COPYSIGN
  UNSPEC_XORSIGN
  UNSPEC_IEEE_MIN	; not commutative
  UNSPEC_IEEE_MAX	; not commutative

  ;; x87 Floating point
  UNSPEC_SIN
  UNSPEC_COS
  UNSPEC_FPATAN
  UNSPEC_FYL2X
  UNSPEC_FYL2XP1
  UNSPEC_FRNDINT
  UNSPEC_FIST
  UNSPEC_F2XM1
  UNSPEC_TAN
  UNSPEC_FXAM

  ;; x87 Rounding
  UNSPEC_FRNDINT_ROUNDEVEN
  UNSPEC_FRNDINT_FLOOR
  UNSPEC_FRNDINT_CEIL
  UNSPEC_FRNDINT_TRUNC
  UNSPEC_FIST_FLOOR
  UNSPEC_FIST_CEIL

  ;; x87 Double output FP
  UNSPEC_SINCOS_COS
  UNSPEC_SINCOS_SIN
  UNSPEC_XTRACT_FRACT
  UNSPEC_XTRACT_EXP
  UNSPEC_FSCALE_FRACT
  UNSPEC_FSCALE_EXP
  UNSPEC_FPREM_F
  UNSPEC_FPREM_U
  UNSPEC_FPREM1_F
  UNSPEC_FPREM1_U

  UNSPEC_C2_FLAG
  UNSPEC_FXAM_MEM

  ;; SSP patterns
  UNSPEC_SP_SET
  UNSPEC_SP_TEST

  ;; For ROUND support
  UNSPEC_ROUND

  ;; For CRC32 support
  UNSPEC_CRC32

  ;; For LZCNT suppoprt
  UNSPEC_LZCNT

  ;; For BMI support
  UNSPEC_TZCNT
  UNSPEC_BEXTR

  ;; For BMI2 support
  UNSPEC_PDEP
  UNSPEC_PEXT

  ;; IRET support
  UNSPEC_INTERRUPT_RETURN
])

(define_c_enum "unspecv" [
  UNSPECV_UD2
  UNSPECV_BLOCKAGE
  UNSPECV_STACK_PROBE
  UNSPECV_PROBE_STACK_RANGE
  UNSPECV_ALIGN
  UNSPECV_PROLOGUE_USE
  UNSPECV_SPLIT_STACK_RETURN
  UNSPECV_CLD
  UNSPECV_NOPS
  UNSPECV_RDTSC
  UNSPECV_RDTSCP
  UNSPECV_RDPMC
  UNSPECV_LLWP_INTRINSIC
  UNSPECV_SLWP_INTRINSIC
  UNSPECV_LWPVAL_INTRINSIC
  UNSPECV_LWPINS_INTRINSIC
  UNSPECV_RDFSBASE
  UNSPECV_RDGSBASE
  UNSPECV_WRFSBASE
  UNSPECV_WRGSBASE
  UNSPECV_FXSAVE
  UNSPECV_FXRSTOR
  UNSPECV_FXSAVE64
  UNSPECV_FXRSTOR64
  UNSPECV_XSAVE
  UNSPECV_XRSTOR
  UNSPECV_XSAVE64
  UNSPECV_XRSTOR64
  UNSPECV_XSAVEOPT
  UNSPECV_XSAVEOPT64
  UNSPECV_XSAVES
  UNSPECV_XRSTORS
  UNSPECV_XSAVES64
  UNSPECV_XRSTORS64
  UNSPECV_XSAVEC
  UNSPECV_XSAVEC64
  UNSPECV_XGETBV
  UNSPECV_XSETBV
  UNSPECV_WBINVD
  UNSPECV_WBNOINVD

  ;; For atomic compound assignments.
  UNSPECV_FNSTENV
  UNSPECV_FLDENV
  UNSPECV_FNSTSW
  UNSPECV_FNCLEX

  ;; For RDRAND support
  UNSPECV_RDRAND

  ;; For RDSEED support
  UNSPECV_RDSEED

  ;; For RTM support
  UNSPECV_XBEGIN
  UNSPECV_XEND
  UNSPECV_XABORT
  UNSPECV_XTEST

  UNSPECV_NLGR

  ;; For CLWB support
  UNSPECV_CLWB

  ;; For CLFLUSHOPT support
  UNSPECV_CLFLUSHOPT

  ;; For MONITORX and MWAITX support 
  UNSPECV_MONITORX
  UNSPECV_MWAITX

  ;; For CLZERO support
  UNSPECV_CLZERO

  ;; For RDPKRU and WRPKRU support
  UNSPECV_PKU

  ;; For RDPID support
  UNSPECV_RDPID

  ;; For CET support
  UNSPECV_NOP_ENDBR
  UNSPECV_NOP_RDSSP
  UNSPECV_INCSSP
  UNSPECV_SAVEPREVSSP
  UNSPECV_RSTORSSP
  UNSPECV_WRSS
  UNSPECV_WRUSS
  UNSPECV_SETSSBSY
  UNSPECV_CLRSSBSY

  ;; For MOVDIRI and MOVDIR64B support
  UNSPECV_MOVDIRI
  UNSPECV_MOVDIR64B

  ;; For WAITPKG support
  UNSPECV_UMWAIT
  UNSPECV_UMONITOR
  UNSPECV_TPAUSE

  ;; For CLDEMOTE support
  UNSPECV_CLDEMOTE

  ;; For Speculation Barrier support
  UNSPECV_SPECULATION_BARRIER

  UNSPECV_PTWRITE

  ;; For ENQCMD and ENQCMDS support
  UNSPECV_ENQCMD
  UNSPECV_ENQCMDS
])

;; Constants to represent rounding modes in the ROUND instruction
(define_constants
  [(ROUND_ROUNDEVEN		0x0)
   (ROUND_FLOOR			0x1)
   (ROUND_CEIL			0x2)
   (ROUND_TRUNC			0x3)
   (ROUND_MXCSR			0x4)
   (ROUND_NO_EXC		0x8)
  ])

;; Constants to represent AVX512F embeded rounding
(define_constants
  [(ROUND_NEAREST_INT			0)
   (ROUND_NEG_INF			1)
   (ROUND_POS_INF			2)
   (ROUND_ZERO				3)
   (NO_ROUND				4)
   (ROUND_SAE				8)
  ])

;; Constants to represent pcomtrue/pcomfalse variants
(define_constants
  [(PCOM_FALSE			0)
   (PCOM_TRUE			1)
   (COM_FALSE_S			2)
   (COM_FALSE_P			3)
   (COM_TRUE_S			4)
   (COM_TRUE_P			5)
  ])

;; Constants used in the XOP pperm instruction
(define_constants
  [(PPERM_SRC			0x00)	/* copy source */
   (PPERM_INVERT		0x20)	/* invert source */
   (PPERM_REVERSE		0x40)	/* bit reverse source */
   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
   (PPERM_ZERO			0x80)	/* all 0's */
   (PPERM_ONES			0xa0)	/* all 1's */
   (PPERM_SIGN			0xc0)	/* propagate sign bit */
   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
   (PPERM_SRC1			0x00)	/* use first source byte */
   (PPERM_SRC2			0x10)	/* use second source byte */
   ])

;; Registers by name.
(define_constants
  [(AX_REG			 0)
   (DX_REG			 1)
   (CX_REG			 2)
   (BX_REG			 3)
   (SI_REG			 4)
   (DI_REG			 5)
   (BP_REG			 6)
   (SP_REG			 7)
   (ST0_REG			 8)
   (ST1_REG			 9)
   (ST2_REG			10)
   (ST3_REG			11)
   (ST4_REG			12)
   (ST5_REG			13)
   (ST6_REG			14)
   (ST7_REG			15)
   (ARGP_REG			16)
   (FLAGS_REG			17)
   (FPSR_REG			18)
   (FRAME_REG			19)
   (XMM0_REG			20)
   (XMM1_REG			21)
   (XMM2_REG			22)
   (XMM3_REG			23)
   (XMM4_REG			24)
   (XMM5_REG			25)
   (XMM6_REG			26)
   (XMM7_REG			27)
   (MM0_REG			28)
   (MM1_REG			29)
   (MM2_REG			30)
   (MM3_REG			31)
   (MM4_REG			32)
   (MM5_REG			33)
   (MM6_REG			34)
   (MM7_REG			35)
   (R8_REG			36)
   (R9_REG			37)
   (R10_REG			38)
   (R11_REG			39)
   (R12_REG			40)
   (R13_REG			41)
   (R14_REG			42)
   (R15_REG			43)
   (XMM8_REG			44)
   (XMM9_REG			45)
   (XMM10_REG			46)
   (XMM11_REG			47)
   (XMM12_REG			48)
   (XMM13_REG			49)
   (XMM14_REG			50)
   (XMM15_REG			51)
   (XMM16_REG			52)
   (XMM17_REG			53)
   (XMM18_REG			54)
   (XMM19_REG			55)
   (XMM20_REG			56)
   (XMM21_REG			57)
   (XMM22_REG			58)
   (XMM23_REG			59)
   (XMM24_REG			60)
   (XMM25_REG			61)
   (XMM26_REG			62)
   (XMM27_REG			63)
   (XMM28_REG			64)
   (XMM29_REG			65)
   (XMM30_REG			66)
   (XMM31_REG			67)
   (MASK0_REG			68)
   (MASK1_REG			69)
   (MASK2_REG			70)
   (MASK3_REG			71)
   (MASK4_REG			72)
   (MASK5_REG			73)
   (MASK6_REG			74)
   (MASK7_REG			75)
   (FIRST_PSEUDO_REG		76)
  ])

;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; from i386.c.

;; In C guard expressions, put expressions which may be compile-time
;; constants first.  This allows for better optimization.  For
;; example, write "TARGET_64BIT && reload_completed", not
;; "reload_completed && TARGET_64BIT".


;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
		    atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
		    bdver4,btver2,znver1,znver2"
  (const (symbol_ref "ix86_schedule")))

;; A basic instruction type.  Refinements due to arguments to be
;; provided in other attributes.
(define_attr "type"
  "other,multi,
   alu,alu1,negnot,imov,imovx,lea,
   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
   imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
   push,pop,call,callv,leave,
   str,bitmanip,
   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
   fxch,fistp,fisttp,frndint,
   sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
   ssemul,sseimul,ssediv,sselog,sselog1,
   sseishft,sseishft1,ssecmp,ssecomi,
   ssecvt,ssecvt1,sseicvt,sseins,
   sseshuf,sseshuf1,ssemuladd,sse4arg,
   lwp,mskmov,msklog,
   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
  (const_string "other"))

;; Main data type used by the insn
(define_attr "mode"
  "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
  V2DF,V2SF,V1DF,V8DF"
  (const_string "unknown"))

;; The CPU unit operations uses.
(define_attr "unit" "integer,i387,sse,mmx,unknown"
  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
			  fxch,fistp,fisttp,frndint")
	   (const_string "i387")
	 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
			  ssemul,sseimul,ssediv,sselog,sselog1,
			  sseishft,sseishft1,ssecmp,ssecomi,
			  ssecvt,ssecvt1,sseicvt,sseins,
			  sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
	   (const_string "sse")
	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
	   (const_string "mmx")
	 (eq_attr "type" "other")
	   (const_string "unknown")]
	 (const_string "integer")))

;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
  (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
			  bitmanip,imulx,msklog,mskmov")
	   (const_int 0)
	 (eq_attr "unit" "i387,sse,mmx")
	   (const_int 0)
	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
			  rotate,rotatex,rotate1,imul,icmp,push,pop")
	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
	 (eq_attr "type" "imov,test")
	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
	 (eq_attr "type" "call")
	   (if_then_else (match_operand 0 "constant_call_address_operand")
	     (const_int 4)
	     (const_int 0))
	 (eq_attr "type" "callv")
	   (if_then_else (match_operand 1 "constant_call_address_operand")
	     (const_int 4)
	     (const_int 0))
	 ;; We don't know the size before shorten_branches.  Expect
	 ;; the instruction to fit for better scheduling.
	 (eq_attr "type" "ibr")
	   (const_int 1)
	 ]
	 (symbol_ref "/* Update immediate_length and other attributes! */
		      gcc_unreachable (),1")))

;; The (bounding maximum) length of an instruction address.
(define_attr "length_address" ""
  (cond [(eq_attr "type" "str,other,multi,fxch")
	   (const_int 0)
	 (and (eq_attr "type" "call")
	      (match_operand 0 "constant_call_address_operand"))
	     (const_int 0)
	 (and (eq_attr "type" "callv")
	      (match_operand 1 "constant_call_address_operand"))
	     (const_int 0)
	 ]
	 (symbol_ref "ix86_attr_length_address_default (insn)")))

;; Set when length prefix is used.
(define_attr "prefix_data16" ""
  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
	   (const_int 0)
	 (eq_attr "mode" "HI")
	   (const_int 1)
	 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
	   (const_int 1)
	]
	(const_int 0)))

;; Set when string REP prefix is used.
(define_attr "prefix_rep" ""
  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
	   (const_int 0)
	 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
	   (const_int 1)
	]
	(const_int 0)))

;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
  (if_then_else
    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
	 (eq_attr "unit" "sse,mmx"))
    (const_int 1)
    (const_int 0)))

;; Set when REX opcode prefix is used.
(define_attr "prefix_rex" ""
  (cond [(not (match_test "TARGET_64BIT"))
	   (const_int 0)
	 (and (eq_attr "mode" "DI")
	      (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
		   (eq_attr "unit" "!mmx")))
	   (const_int 1)
	 (and (eq_attr "mode" "QI")
	      (match_test "x86_extended_QIreg_mentioned_p (insn)"))
	   (const_int 1)
	 (match_test "x86_extended_reg_mentioned_p (insn)")
	   (const_int 1)
	 (and (eq_attr "type" "imovx")
	      (match_operand:QI 1 "ext_QIreg_operand"))
	   (const_int 1)
	]
	(const_int 0)))

;; There are also additional prefixes in 3DNOW, SSSE3.
;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
(define_attr "prefix_extra" ""
  (cond [(eq_attr "type" "ssemuladd,sse4arg")
	   (const_int 2)
	 (eq_attr "type" "sseiadd1,ssecvt1")
	   (const_int 1)
	]
	(const_int 0)))

;; Prefix used: original, VEX or maybe VEX.
(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
  (cond [(eq_attr "mode" "OI,V8SF,V4DF")
           (const_string "vex")
         (eq_attr "mode" "XI,V16SF,V8DF")
           (const_string "evex")
        ]
        (const_string "orig")))

;; VEX W bit is used.
(define_attr "prefix_vex_w" "" (const_int 0))

;; The length of VEX prefix
;; Only instructions with 0f prefix can have 2 byte VEX prefix,
;; 0f38/0f3a prefixes can't.  In i386.md 0f3[8a] is
;; still prefix_0f 1, with prefix_extra 1.
(define_attr "length_vex" ""
  (if_then_else (and (eq_attr "prefix_0f" "1")
		     (eq_attr "prefix_extra" "0"))
    (if_then_else (eq_attr "prefix_vex_w" "1")
      (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
      (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
    (if_then_else (eq_attr "prefix_vex_w" "1")
      (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
      (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))

;; 4-bytes evex prefix and 1 byte opcode.
(define_attr "length_evex" "" (const_int 5))

;; Set when modrm byte is used.
(define_attr "modrm" ""
  (cond [(eq_attr "type" "str,leave")
	   (const_int 0)
	 (eq_attr "unit" "i387")
	   (const_int 0)
         (and (eq_attr "type" "incdec")
	      (and (not (match_test "TARGET_64BIT"))
		   (ior (match_operand:SI 1 "register_operand")
			(match_operand:HI 1 "register_operand"))))
	   (const_int 0)
	 (and (eq_attr "type" "push")
	      (not (match_operand 1 "memory_operand")))
	   (const_int 0)
	 (and (eq_attr "type" "pop")
	      (not (match_operand 0 "memory_operand")))
	   (const_int 0)
	 (and (eq_attr "type" "imov")
	      (and (not (eq_attr "mode" "DI"))
		   (ior (and (match_operand 0 "register_operand")
			     (match_operand 1 "immediate_operand"))
		        (ior (and (match_operand 0 "ax_reg_operand")
				  (match_operand 1 "memory_displacement_only_operand"))
			     (and (match_operand 0 "memory_displacement_only_operand")
				  (match_operand 1 "ax_reg_operand"))))))
	   (const_int 0)
	 (and (eq_attr "type" "call")
	      (match_operand 0 "constant_call_address_operand"))
	     (const_int 0)
	 (and (eq_attr "type" "callv")
	      (match_operand 1 "constant_call_address_operand"))
	     (const_int 0)
	 (and (eq_attr "type" "alu,alu1,icmp,test")
	      (match_operand 0 "ax_reg_operand"))
	     (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
	 ]
	 (const_int 1)))

;; The (bounding maximum) length of an instruction in bytes.
;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
;; Later we may want to split them and compute proper length as for
;; other insns.
(define_attr "length" ""
  (cond [(eq_attr "type" "other,multi,fistp,frndint")
	   (const_int 16)
	 (eq_attr "type" "fcmp")
	   (const_int 4)
	 (eq_attr "unit" "i387")
	   (plus (const_int 2)
		 (plus (attr "prefix_data16")
		       (attr "length_address")))
	 (ior (eq_attr "prefix" "evex")
	      (and (ior (eq_attr "prefix" "maybe_evex")
			(eq_attr "prefix" "maybe_vex"))
		   (match_test "TARGET_AVX512F")))
	   (plus (attr "length_evex")
		 (plus (attr "length_immediate")
		       (plus (attr "modrm")
			     (attr "length_address"))))
	 (ior (eq_attr "prefix" "vex")
	      (and (ior (eq_attr "prefix" "maybe_vex")
			(eq_attr "prefix" "maybe_evex"))
		   (match_test "TARGET_AVX")))
	   (plus (attr "length_vex")
		 (plus (attr "length_immediate")
		       (plus (attr "modrm")
			     (attr "length_address"))))]
	 (plus (plus (attr "modrm")
		     (plus (attr "prefix_0f")
			   (plus (attr "prefix_rex")
				 (plus (attr "prefix_extra")
				       (const_int 1)))))
	       (plus (attr "prefix_rep")
		     (plus (attr "prefix_data16")
			   (plus (attr "length_immediate")
				 (attr "length_address")))))))

;; The `memory' attribute is `none' if no memory is referenced, `load' or
;; `store' if there is a simple memory reference therein, or `unknown'
;; if the instruction is complex.

(define_attr "memory" "none,load,store,both,unknown"
  (cond [(eq_attr "type" "other,multi,str,lwp")
	   (const_string "unknown")
	 (eq_attr "type" "lea,fcmov,fpspc")
	   (const_string "none")
	 (eq_attr "type" "fistp,leave")
	   (const_string "both")
	 (eq_attr "type" "frndint")
	   (const_string "load")
	 (eq_attr "type" "push")
	   (if_then_else (match_operand 1 "memory_operand")
	     (const_string "both")
	     (const_string "store"))
	 (eq_attr "type" "pop")
	   (if_then_else (match_operand 0 "memory_operand")
	     (const_string "both")
	     (const_string "load"))
	 (eq_attr "type" "setcc")
	   (if_then_else (match_operand 0 "memory_operand")
	     (const_string "store")
	     (const_string "none"))
	 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
	   (if_then_else (ior (match_operand 0 "memory_operand")
			      (match_operand 1 "memory_operand"))
	     (const_string "load")
	     (const_string "none"))
	 (eq_attr "type" "ibr")
	   (if_then_else (match_operand 0 "memory_operand")
	     (const_string "load")
	     (const_string "none"))
	 (eq_attr "type" "call")
	   (if_then_else (match_operand 0 "constant_call_address_operand")
	     (const_string "none")
	     (const_string "load"))
	 (eq_attr "type" "callv")
	   (if_then_else (match_operand 1 "constant_call_address_operand")
	     (const_string "none")
	     (const_string "load"))
	 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
	      (match_operand 1 "memory_operand"))
	   (const_string "both")
	 (and (match_operand 0 "memory_operand")
	      (match_operand 1 "memory_operand"))
	   (const_string "both")
	 (match_operand 0 "memory_operand")
	   (const_string "store")
	 (match_operand 1 "memory_operand")
	   (const_string "load")
	 (and (eq_attr "type"
		 "!alu1,negnot,ishift1,rotate1,
		   imov,imovx,icmp,test,bitmanip,
		   fmov,fcmp,fsgn,
		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
		   sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
		   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
	      (match_operand 2 "memory_operand"))
	   (const_string "load")
	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
	      (match_operand 3 "memory_operand"))
	   (const_string "load")
	]
	(const_string "none")))

;; Indicates if an instruction has both an immediate and a displacement.

(define_attr "imm_disp" "false,true,unknown"
  (cond [(eq_attr "type" "other,multi")
	   (const_string "unknown")
	 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
	      (and (match_operand 0 "memory_displacement_operand")
		   (match_operand 1 "immediate_operand")))
	   (const_string "true")
	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
	      (and (match_operand 0 "memory_displacement_operand")
		   (match_operand 2 "immediate_operand")))
	   (const_string "true")
	]
	(const_string "false")))

;; Indicates if an FP operation has an integer source.

(define_attr "fp_int_src" "false,true"
  (const_string "false"))

;; Defines rounding mode of an FP operation.

(define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
  (const_string "any"))

;; Define attribute to indicate AVX insns with partial XMM register update.
(define_attr "avx_partial_xmm_update" "false,true"
  (const_string "false"))

;; Define attribute to classify add/sub insns that consumes carry flag (CF)
(define_attr "use_carry" "0,1" (const_string "0"))

;; Define attribute to indicate unaligned ssemov insns
(define_attr "movu" "0,1" (const_string "0"))

;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
		    sse_noavx,sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
		    avx512bw,noavx512bw,avx512dq,noavx512dq,
		    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
  (const_string "base"))

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
  (const_string "base"))

(define_attr "enabled" ""
  (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
	 (eq_attr "isa" "x64_sse2")
	   (symbol_ref "TARGET_64BIT && TARGET_SSE2")
	 (eq_attr "isa" "x64_sse4")
	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
	 (eq_attr "isa" "x64_sse4_noavx")
	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
	 (eq_attr "isa" "x64_avx")
	   (symbol_ref "TARGET_64BIT && TARGET_AVX")
	 (eq_attr "isa" "x64_avx512dq")
	   (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
	 (eq_attr "isa" "x64_avx512bw")
	   (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
	 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
	 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
	 (eq_attr "isa" "sse_noavx")
	   (symbol_ref "TARGET_SSE && !TARGET_AVX")
	 (eq_attr "isa" "sse2_noavx")
	   (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
	 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
	 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
	 (eq_attr "isa" "sse4_noavx")
	   (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
	 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
	 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
	 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
	 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
	 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
	 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
	 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
	 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
	 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
	 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
	 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
	 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
	 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
	 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")

	 (eq_attr "mmx_isa" "native")
	   (symbol_ref "!TARGET_MMX_WITH_SSE")
	 (eq_attr "mmx_isa" "sse")
	   (symbol_ref "TARGET_MMX_WITH_SSE")
	 (eq_attr "mmx_isa" "sse_noavx")
	   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
	 (eq_attr "mmx_isa" "avx")
	   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
	]
	(const_int 1)))

(define_attr "preferred_for_size" "" (const_int 1))
(define_attr "preferred_for_speed" "" (const_int 1))

;; Describe a user's asm statement.
(define_asm_attributes
  [(set_attr "length" "128")
   (set_attr "type" "multi")])

(define_code_iterator plusminus [plus minus])

(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])

(define_code_iterator multdiv [mult div])

;; Base name for define_insn
(define_code_attr plusminus_insn
  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])

;; Base name for insn mnemonic.
(define_code_attr plusminus_mnemonic
  [(plus "add") (ss_plus "adds") (us_plus "addus")
   (minus "sub") (ss_minus "subs") (us_minus "subus")])
(define_code_attr multdiv_mnemonic
  [(mult "mul") (div "div")])

;; Mark commutative operators as such in constraints.
(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
			(minus "") (ss_minus "") (us_minus "")])

;; Mapping of max and min
(define_code_iterator maxmin [smax smin umax umin])

;; Mapping of signed max and min
(define_code_iterator smaxmin [smax smin])

;; Mapping of unsigned max and min
(define_code_iterator umaxmin [umax umin])

;; Base name for integer and FP insn mnemonic
(define_code_attr maxmin_int [(smax "maxs") (smin "mins")
			      (umax "maxu") (umin "minu")])
(define_code_attr maxmin_float [(smax "max") (smin "min")])

(define_int_iterator IEEE_MAXMIN
	[UNSPEC_IEEE_MAX
	 UNSPEC_IEEE_MIN])

(define_int_attr ieee_maxmin
	[(UNSPEC_IEEE_MAX "max")
	 (UNSPEC_IEEE_MIN "min")])

;; Mapping of logic operators
(define_code_iterator any_logic [and ior xor])
(define_code_iterator any_or [ior xor])
(define_code_iterator fpint_logic [and xor])

;; Base name for insn mnemonic.
(define_code_attr logic [(and "and") (ior "or") (xor "xor")])

;; Mapping of logic-shift operators
(define_code_iterator any_lshift [ashift lshiftrt])

;; Mapping of shift-right operators
(define_code_iterator any_shiftrt [lshiftrt ashiftrt])

;; Mapping of all shift operators
(define_code_iterator any_shift [ashift lshiftrt ashiftrt])

;; Base name for define_insn
(define_code_attr shift_insn
  [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")])

;; Base name for insn mnemonic.
(define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")])
(define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])

;; Mapping of rotate operators
(define_code_iterator any_rotate [rotate rotatert])

;; Base name for define_insn
(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])

;; Base name for insn mnemonic.
(define_code_attr rotate [(rotate "rol") (rotatert "ror")])

;; Mapping of abs neg operators
(define_code_iterator absneg [abs neg])

;; Base name for x87 insn mnemonic.
(define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])

;; Used in signed and unsigned widening multiplications.
(define_code_iterator any_extend [sign_extend zero_extend])

;; Prefix for insn menmonic.
(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
			     (div "i") (udiv "")])
;; Prefix for define_insn
(define_code_attr s [(sign_extend "s") (zero_extend "u")])
(define_code_attr u [(sign_extend "") (zero_extend "u")
		     (div "") (udiv "u")])
(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
			  (div "false") (udiv "true")])

;; Used in signed and unsigned truncations.
(define_code_iterator any_truncate [ss_truncate truncate us_truncate])
;; Instruction suffix for truncations.
(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")])

;; Used in signed and unsigned fix.
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
(define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])

;; Used in signed and unsigned float.
(define_code_iterator any_float [float unsigned_float])
(define_code_attr floatsuffix [(float "") (unsigned_float "u")])
(define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
(define_code_attr floatprefix [(float "s") (unsigned_float "u")])

;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])

;; All integer modes without QImode.
(define_mode_iterator SWI248x [HI SI DI])

;; All integer modes without QImode and HImode.
(define_mode_iterator SWI48x [SI DI])

;; All integer modes without SImode and DImode.
(define_mode_iterator SWI12 [QI HI])

;; All integer modes without DImode.
(define_mode_iterator SWI124 [QI HI SI])

;; All integer modes without QImode and DImode.
(define_mode_iterator SWI24 [HI SI])

;; Single word integer modes.
(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])

;; Single word integer modes without QImode.
(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])

;; Single word integer modes without QImode and HImode.
(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])

;; All math-dependant single and double word integer modes.
(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
			     (HI "TARGET_HIMODE_MATH")
			     SI DI (TI "TARGET_64BIT")])

;; Math-dependant single word integer modes.
(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
			    (HI "TARGET_HIMODE_MATH")
			    SI (DI "TARGET_64BIT")])

;; Math-dependant integer modes without DImode.
(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
			       (HI "TARGET_HIMODE_MATH")
			       SI])

;; Math-dependant integer modes with DImode (enabled for 32bit with STV).
(define_mode_iterator SWIM1248s
	[(QI "TARGET_QIMODE_MATH")
	 (HI "TARGET_HIMODE_MATH")
	 SI (DI "TARGET_64BIT || (TARGET_STV && TARGET_SSE2)")])

;; Math-dependant single word integer modes without QImode.
(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
		      	       SI (DI "TARGET_64BIT")])

;; Double word integer modes.
(define_mode_iterator DWI [(DI "!TARGET_64BIT")
			   (TI "TARGET_64BIT")])

;; SWI and DWI together.
(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])

;; GET_MODE_SIZE for selected modes.  As GET_MODE_SIZE is not
;; compile time constant, it is faster to use <MODE_SIZE> than
;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on
;; command line options just use GET_MODE_SIZE macro.
(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16")
			     (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)")
			     (V16QI "16") (V32QI "32") (V64QI "64")
			     (V8HI "16") (V16HI "32") (V32HI "64")
			     (V4SI "16") (V8SI "32") (V16SI "64")
			     (V2DI "16") (V4DI "32") (V8DI "64")
			     (V1TI "16") (V2TI "32") (V4TI "64")
			     (V2DF "16") (V4DF "32") (V8DF "64")
			     (V4SF "16") (V8SF "32") (V16SF "64")])

;; Double word integer modes as mode attribute.
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])

;; LEA mode corresponding to an integer mode
(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])

;; Half mode for double word integer modes.
(define_mode_iterator DWIH [(SI "!TARGET_64BIT")
			    (DI "TARGET_64BIT")])

;; Instruction suffix for integer modes.
(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])

;; Instruction suffix for masks.
(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])

;; Pointer size prefix for integer modes (Intel asm dialect)
(define_mode_attr iptrsize [(QI "BYTE")
			    (HI "WORD")
			    (SI "DWORD")
			    (DI "QWORD")])

;; Register class for integer modes.
(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])

;; Immediate operand constraint for integer modes.
(define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])

;; General operand constraint for word modes.
(define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])

;; Immediate operand constraint for double integer modes.
(define_mode_attr di [(SI "nF") (DI "Wd")])

;; Immediate operand constraint for shifts.
(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])

;; Print register name in the specified mode.
(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])

;; General operand predicate for integer modes.
(define_mode_attr general_operand
	[(QI "general_operand")
	 (HI "general_operand")
	 (SI "x86_64_general_operand")
	 (DI "x86_64_general_operand")
	 (TI "x86_64_general_operand")])

;; General operand predicate for integer modes, where for TImode
;; we need both words of the operand to be general operands.
(define_mode_attr general_hilo_operand
	[(QI "general_operand")
	 (HI "general_operand")
	 (SI "x86_64_general_operand")
	 (DI "x86_64_general_operand")
	 (TI "x86_64_hilo_general_operand")])

;; General sign extend operand predicate for integer modes,
;; which disallows VOIDmode operands and thus it is suitable
;; for use inside sign_extend.
(define_mode_attr general_sext_operand
	[(QI "sext_operand")
	 (HI "sext_operand")
	 (SI "x86_64_sext_operand")
	 (DI "x86_64_sext_operand")])

;; General sign/zero extend operand predicate for integer modes.
(define_mode_attr general_szext_operand
	[(QI "general_operand")
	 (HI "general_operand")
	 (SI "x86_64_szext_general_operand")
	 (DI "x86_64_szext_general_operand")])

(define_mode_attr nonmemory_szext_operand
	[(QI "nonmemory_operand")
	 (HI "nonmemory_operand")
	 (SI "x86_64_szext_nonmemory_operand")
	 (DI "x86_64_szext_nonmemory_operand")])

;; Immediate operand predicate for integer modes.
(define_mode_attr immediate_operand
	[(QI "immediate_operand")
	 (HI "immediate_operand")
	 (SI "x86_64_immediate_operand")
	 (DI "x86_64_immediate_operand")])

;; Nonmemory operand predicate for integer modes.
(define_mode_attr nonmemory_operand
	[(QI "nonmemory_operand")
	 (HI "nonmemory_operand")
	 (SI "x86_64_nonmemory_operand")
	 (DI "x86_64_nonmemory_operand")])

;; Operand predicate for shifts.
(define_mode_attr shift_operand
	[(QI "nonimmediate_operand")
	 (HI "nonimmediate_operand")
	 (SI "nonimmediate_operand")
	 (DI "shiftdi_operand")
	 (TI "register_operand")])

;; Operand predicate for shift argument.
(define_mode_attr shift_immediate_operand
	[(QI "const_1_to_31_operand")
	 (HI "const_1_to_31_operand")
	 (SI "const_1_to_31_operand")
	 (DI "const_1_to_63_operand")])

;; Input operand predicate for arithmetic left shifts.
(define_mode_attr ashl_input_operand
	[(QI "nonimmediate_operand")
	 (HI "nonimmediate_operand")
	 (SI "nonimmediate_operand")
	 (DI "ashldi_input_operand")
	 (TI "reg_or_pm1_operand")])

;; SSE and x87 SFmode and DFmode floating point modes
(define_mode_iterator MODEF [SF DF])

;; All x87 floating point modes
(define_mode_iterator X87MODEF [SF DF XF])

;; All SSE floating point modes
(define_mode_iterator SSEMODEF [SF DF TF])
(define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")])

;; SSE instruction suffix for various modes
(define_mode_attr ssemodesuffix
  [(SF "ss") (DF "sd")
   (V16SF "ps") (V8DF "pd")
   (V8SF "ps") (V4DF "pd")
   (V4SF "ps") (V2DF "pd")
   (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
   (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
   (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])

;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])

;; SSE vector mode corresponding to a scalar mode
(define_mode_attr ssevecmode
  [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
(define_mode_attr ssevecmodelower
  [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])

;; AVX512F vector mode corresponding to a scalar mode
(define_mode_attr avx512fvecmode
  [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])

;; Instruction suffix for REX 64bit operators.
(define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
(define_mode_attr rex64namesuffix [(SI "") (DI "q")])

;; This mode iterator allows :P to be used for patterns that operate on
;; pointer-sized quantities.  Exactly one of the two alternatives will match.
(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])

;; This mode iterator allows :W to be used for patterns that operate on
;; word_mode sized quantities.
(define_mode_iterator W
  [(SI "word_mode == SImode") (DI "word_mode == DImode")])

;; This mode iterator allows :PTR to be used for patterns that operate on
;; ptr_mode sized quantities.
(define_mode_iterator PTR
  [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])

;; Scheduling descriptions

(include "pentium.md")
(include "ppro.md")
(include "k6.md")
(include "athlon.md")
(include "bdver1.md")
(include "bdver3.md")
(include "btver2.md")
(include "znver1.md")
(include "geode.md")
(include "atom.md")
(include "slm.md")
(include "glm.md")
(include "core2.md")
(include "haswell.md")


;; Operand and operator predicates and constraints

(include "predicates.md")
(include "constraints.md")


;; Compare and branch/compare and store instructions.

(define_expand "cbranch<mode>4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:SDWIM 1 "nonimmediate_operand")
		    (match_operand:SDWIM 2 "<general_operand>")))
   (set (pc) (if_then_else
	       (match_operator 0 "ordered_comparison_operator"
		[(reg:CC FLAGS_REG) (const_int 0)])
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
{
  if (MEM_P (operands[1]) && MEM_P (operands[2]))
    operands[1] = force_reg (<MODE>mode, operands[1]);
  ix86_expand_branch (GET_CODE (operands[0]),
		      operands[1], operands[2], operands[3]);
  DONE;
})

(define_expand "cstore<mode>4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:SWIM 2 "nonimmediate_operand")
		    (match_operand:SWIM 3 "<general_operand>")))
   (set (match_operand:QI 0 "register_operand")
	(match_operator 1 "ordered_comparison_operator"
	  [(reg:CC FLAGS_REG) (const_int 0)]))]
  ""
{
  if (MEM_P (operands[2]) && MEM_P (operands[3]))
    operands[2] = force_reg (<MODE>mode, operands[2]);
  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
		     operands[2], operands[3]);
  DONE;
})

(define_expand "@cmp<mode>_1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
		    (match_operand:SWI48 1 "<general_operand>")))])

(define_mode_iterator SWI1248_AVX512BWDQ_64
  [(QI "TARGET_AVX512DQ") HI
   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])

(define_insn "*cmp<mode>_ccz_1"
  [(set (reg FLAGS_REG)
	(compare (match_operand:SWI1248_AVX512BWDQ_64 0
			"nonimmediate_operand" "<r>,?m<r>,$k")
		 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
  "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
  "@
   test{<imodesuffix>}\t%0, %0
   cmp{<imodesuffix>}\t{%1, %0|%0, %1}
   kortest<mskmodesuffix>\t%0, %0"
  [(set_attr "type" "test,icmp,msklog")
   (set_attr "length_immediate" "0,1,*")
   (set_attr "prefix" "*,*,vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*cmp<mode>_ccno_1"
  [(set (reg FLAGS_REG)
	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
		 (match_operand:SWI 1 "const0_operand")))]
  "ix86_match_ccmode (insn, CCNOmode)"
  "@
   test{<imodesuffix>}\t%0, %0
   cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "test,icmp")
   (set_attr "length_immediate" "0,1")
   (set_attr "mode" "<MODE>")])

(define_insn "*cmp<mode>_1"
  [(set (reg FLAGS_REG)
	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
		 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m")))]
  "ix86_match_ccmode (insn, CCmode)"
  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "icmp")
   (set_attr "mode" "<MODE>")])

(define_insn "*cmp<mode>_minus_1"
  [(set (reg FLAGS_REG)
	(compare
	  (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
		     (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
	  (const_int 0)))]
  "ix86_match_ccmode (insn, CCGOCmode)"
  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "icmp")
   (set_attr "mode" "<MODE>")])

(define_insn "*cmpqi_ext_1"
  [(set (reg FLAGS_REG)
	(compare
	  (match_operand:QI 0 "nonimmediate_operand" "QBc,m")
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 1 "ext_register_operand" "Q,Q")
	      (const_int 8)
	      (const_int 8)) 0)))]
  "ix86_match_ccmode (insn, CCmode)"
  "cmp{b}\t{%h1, %0|%0, %h1}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "icmp")
   (set_attr "mode" "QI")])

(define_insn "*cmpqi_ext_2"
  [(set (reg FLAGS_REG)
	(compare
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 0 "ext_register_operand" "Q")
	      (const_int 8)
	      (const_int 8)) 0)
	  (match_operand:QI 1 "const0_operand")))]
  "ix86_match_ccmode (insn, CCNOmode)"
  "test{b}\t%h0, %h0"
  [(set_attr "type" "test")
   (set_attr "length_immediate" "0")
   (set_attr "mode" "QI")])

(define_expand "cmpqi_ext_3"
  [(set (reg:CC FLAGS_REG)
	(compare:CC
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 0 "ext_register_operand")
	      (const_int 8)
	      (const_int 8)) 0)
	  (match_operand:QI 1 "const_int_operand")))])

(define_insn "*cmpqi_ext_3"
  [(set (reg FLAGS_REG)
	(compare
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 0 "ext_register_operand" "Q,Q")
	      (const_int 8)
	      (const_int 8)) 0)
	  (match_operand:QI 1 "general_operand" "QnBc,m")))]
  "ix86_match_ccmode (insn, CCmode)"
  "cmp{b}\t{%1, %h0|%h0, %1}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "icmp")
   (set_attr "mode" "QI")])

(define_insn "*cmpqi_ext_4"
  [(set (reg FLAGS_REG)
	(compare
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 0 "ext_register_operand" "Q")
	      (const_int 8)
	      (const_int 8)) 0)
	  (subreg:QI
	    (zero_extract:SI
	      (match_operand 1 "ext_register_operand" "Q")
	      (const_int 8)
	      (const_int 8)) 0)))]
  "ix86_match_ccmode (insn, CCmode)"
  "cmp{b}\t{%h1, %h0|%h0, %h1}"
  [(set_attr "type" "icmp")
   (set_attr "mode" "QI")])

;; These implement float point compares.
;; %%% See if we can get away with VOIDmode operands on the actual insns,
;; which would allow mix and match FP modes on the compares.  Which is what
;; the old patterns did, but with many more of them.

(define_expand "cbranchxf4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:XF 1 "nonmemory_operand")
		    (match_operand:XF 2 "nonmemory_operand")))
   (set (pc) (if_then_else
              (match_operator 0 "ix86_fp_comparison_operator"
               [(reg:CC FLAGS_REG)
                (const_int 0)])
              (label_ref (match_operand 3))
              (pc)))]
  "TARGET_80387"
{
  ix86_expand_branch (GET_CODE (operands[0]),
		      operands[1], operands[2], operands[3]);
  DONE;
})

(define_expand "cstorexf4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:XF 2 "nonmemory_operand")
		    (match_operand:XF 3 "nonmemory_operand")))
   (set (match_operand:QI 0 "register_operand")
              (match_operator 1 "ix86_fp_comparison_operator"
               [(reg:CC FLAGS_REG)
                (const_int 0)]))]
  "TARGET_80387"
{
  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
		     operands[2], operands[3]);
  DONE;
})

(define_expand "cbranch<mode>4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
		    (match_operand:MODEF 2 "cmp_fp_expander_operand")))
   (set (pc) (if_then_else
              (match_operator 0 "ix86_fp_comparison_operator"
               [(reg:CC FLAGS_REG)
                (const_int 0)])
              (label_ref (match_operand 3))
              (pc)))]
  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  ix86_expand_branch (GET_CODE (operands[0]),
		      operands[1], operands[2], operands[3]);
  DONE;
})

(define_expand "cstore<mode>4"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
		    (match_operand:MODEF 3 "cmp_fp_expander_operand")))
   (set (match_operand:QI 0 "register_operand")
              (match_operator 1 "ix86_fp_comparison_operator"
               [(reg:CC FLAGS_REG)
                (const_int 0)]))]
  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
		     operands[2], operands[3]);
  DONE;
})

(define_expand "cbranchcc4"
  [(set (pc) (if_then_else
              (match_operator 0 "comparison_operator"
               [(match_operand 1 "flags_reg_operand")
                (match_operand 2 "const0_operand")])
              (label_ref (match_operand 3))
              (pc)))]
  ""
{
  ix86_expand_branch (GET_CODE (operands[0]),
		      operands[1], operands[2], operands[3]);
  DONE;
})

(define_expand "cstorecc4"
  [(set (match_operand:QI 0 "register_operand")
              (match_operator 1 "comparison_operator"
               [(match_operand 2 "flags_reg_operand")
                (match_operand 3 "const0_operand")]))]
  ""
{
  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
		     operands[2], operands[3]);
  DONE;
})

;; FP compares, step 1:
;; Set the FP condition codes and move fpsr to ax.

;; We may not use "#" to split and emit these
;; due to reg-stack pops killing fpsr.

(define_insn "*cmpxf_i387"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI
	  [(compare:CCFP
	     (match_operand:XF 1 "register_operand" "f")
	     (match_operand:XF 2 "reg_or_0_operand" "fC"))]
	  UNSPEC_FNSTSW))]
  "TARGET_80387"
  "* return output_fp_compare (insn, operands, false, false);"
  [(set_attr "type" "multi")
   (set_attr "unit" "i387")
   (set_attr "mode" "XF")])

(define_insn "*cmp<mode>_i387"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI
	  [(compare:CCFP
	     (match_operand:MODEF 1 "register_operand" "f")
	     (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
	  UNSPEC_FNSTSW))]
  "TARGET_80387"
  "* return output_fp_compare (insn, operands, false, false);"
  [(set_attr "type" "multi")
   (set_attr "unit" "i387")
   (set_attr "mode" "<MODE>")])

(define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI
	  [(compare:CCFP
	     (match_operand:X87MODEF 1 "register_operand" "f")
	     (float:X87MODEF
	       (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
	  UNSPEC_FNSTSW))]
  "TARGET_80387
   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
       || optimize_function_for_size_p (cfun))"
  "* return output_fp_compare (insn, operands, false, false);"
  [(set_attr "type" "multi")
   (set_attr "unit" "i387")
   (set_attr "fp_int_src" "true")
   (set_attr "mode" "<SWI24:MODE>")])

(define_insn "*cmpu<mode>_i387"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI
	  [(unspec:CCFP
	     [(compare:CCFP
		(match_operand:X87MODEF 1 "register_operand" "f")
		(match_operand:X87MODEF 2 "register_operand" "f"))]
	     UNSPEC_NOTRAP)]
	  UNSPEC_FNSTSW))]
  "TARGET_80387"
  "* return output_fp_compare (insn, operands, false, true);"
  [(set_attr "type" "multi")
   (set_attr "unit" "i387")
   (set_attr "mode" "<MODE>")])

;; FP compares, step 2:
;; Get ax into flags, general case.

(define_insn "x86_sahf_1"
  [(set (reg:CC FLAGS_REG)
	(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
		   UNSPEC_SAHF))]
  "TARGET_SAHF"
{
#ifndef HAVE_AS_IX86_SAHF
  if (TARGET_64BIT)
    return ASM_BYTE "0x9e";
  else
#endif
  return "sahf";
}
  [(set_attr "length" "1")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "SI")])

;; Pentium Pro can do both steps in one go.
;; (these instructions set flags directly)

(define_subst_attr "unord" "unord_subst" "" "u")
(define_subst_attr "unordered" "unord_subst" "false" "true")

(define_subst "unord_subst"
  [(set (match_operand:CCFP 0)
        (match_operand:CCFP 1))]
  ""
  [(set (match_dup 0)
        (unspec:CCFP
	  [(match_dup 1)]
	  UNSPEC_NOTRAP))])

(define_insn "*cmpi<unord>xf_i387"
  [(set (reg:CCFP FLAGS_REG)
	(compare:CCFP
	  (match_operand:XF 0 "register_operand" "f")
	  (match_operand:XF 1 "register_operand" "f")))]
  "TARGET_80387 && TARGET_CMOVE"
  "* return output_fp_compare (insn, operands, true, <unordered>);"
  [(set_attr "type" "fcmp")
   (set_attr "mode" "XF")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "double")
   (set_attr "znver1_decode" "double")])

(define_insn "*cmpi<unord><MODEF:mode>"
  [(set (reg:CCFP FLAGS_REG)
	(compare:CCFP
	  (match_operand:MODEF 0 "register_operand" "f,v")
	  (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
  "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
   || (TARGET_80387 && TARGET_CMOVE)"
  "@
   * return output_fp_compare (insn, operands, true, <unordered>);
   %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
  [(set_attr "type" "fcmp,ssecomi")
   (set_attr "prefix" "orig,maybe_vex")
   (set_attr "mode" "<MODEF:MODE>")
   (set_attr "prefix_rep" "*,0")
   (set (attr "prefix_data16")
	(cond [(eq_attr "alternative" "0")
		 (const_string "*")
	       (eq_attr "mode" "DF")
		 (const_string "1")
	      ]
	      (const_string "0")))
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "double")
   (set_attr "znver1_decode" "double")
   (set (attr "enabled")
     (if_then_else
       (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "TARGET_MIX_SSE_I387")
	 (symbol_ref "true"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "true")
	 (symbol_ref "false"))))])

;; Push/pop instructions.

(define_insn "*push<mode>2"
  [(set (match_operand:DWI 0 "push_operand" "=<")
	(match_operand:DWI 1 "general_no_elim_operand" "riF*o"))]
  ""
  "#"
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_split
  [(set (match_operand:DWI 0 "push_operand")
        (match_operand:DWI 1 "general_gr_operand"))]
  "reload_completed"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

(define_insn "*pushdi2_rex64"
  [(set (match_operand:DI 0 "push_operand" "=<,!<")
	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
  "TARGET_64BIT"
  "@
   push{q}\t%1
   #"
  [(set_attr "type" "push,multi")
   (set_attr "mode" "DI")])

;; Convert impossible pushes of immediate to existing instructions.
;; First try to get scratch register and go through it.  In case this
;; fails, push sign extended lower part first and then overwrite
;; upper part by 32bit move.

(define_peephole2
  [(match_scratch:DI 2 "r")
   (set (match_operand:DI 0 "push_operand")
        (match_operand:DI 1 "immediate_operand"))]
  "TARGET_64BIT
   && !symbolic_operand (operands[1], DImode)
   && !x86_64_immediate_operand (operands[1], DImode)"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (match_dup 2))])

(define_split
  [(set (match_operand:DI 0 "push_operand")
        (match_operand:DI 1 "immediate_operand"))]
  "TARGET_64BIT && epilogue_completed
   && !symbolic_operand (operands[1], DImode)
   && !x86_64_immediate_operand (operands[1], DImode)"
  [(set (match_dup 0) (match_dup 1))
   (set (match_dup 2) (match_dup 3))]
{
  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);

  operands[1] = gen_lowpart (DImode, operands[2]);
  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
						   GEN_INT (4)));
})

(define_insn "*pushsi2"
  [(set (match_operand:SI 0 "push_operand" "=<")
	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
  "!TARGET_64BIT"
  "push{l}\t%1"
  [(set_attr "type" "push")
   (set_attr "mode" "SI")])

;; emit_push_insn when it calls move_by_pieces requires an insn to
;; "push a byte/word".  But actually we use pushl, which has the effect
;; of rounding the amount pushed up to a word.

;; For TARGET_64BIT we always round up to 8 bytes.
(define_insn "*push<mode>2_rex64"
  [(set (match_operand:SWI124 0 "push_operand" "=X")
	(match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
  "TARGET_64BIT"
  "push{q}\t%q1"
  [(set_attr "type" "push")
   (set_attr "mode" "DI")])

(define_insn "*push<mode>2"
  [(set (match_operand:SWI12 0 "push_operand" "=X")
	(match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
  "!TARGET_64BIT"
  "push{l}\t%k1"
  [(set_attr "type" "push")
   (set_attr "mode" "SI")])

(define_insn "*push<mode>2_prologue"
  [(set (match_operand:W 0 "push_operand" "=<")
	(match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
   (clobber (mem:BLK (scratch)))]
  ""
  "push{<imodesuffix>}\t%1"
  [(set_attr "type" "push")
   (set_attr "mode" "<MODE>")])

(define_insn "*pop<mode>1"
  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
	(match_operand:W 1 "pop_operand" ">"))]
  ""
  "pop{<imodesuffix>}\t%0"
  [(set_attr "type" "pop")
   (set_attr "mode" "<MODE>")])

(define_insn "*pop<mode>1_epilogue"
  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
	(match_operand:W 1 "pop_operand" ">"))
   (clobber (mem:BLK (scratch)))]
  ""
  "pop{<imodesuffix>}\t%0"
  [(set_attr "type" "pop")
   (set_attr "mode" "<MODE>")])

(define_insn "*pushfl<mode>2"
  [(set (match_operand:W 0 "push_operand" "=<")
	(match_operand:W 1 "flags_reg_operand"))]
  ""
  "pushf{<imodesuffix>}"
  [(set_attr "type" "push")
   (set_attr "mode" "<MODE>")])

(define_insn "*popfl<mode>1"
  [(set (match_operand:W 0 "flags_reg_operand")
	(match_operand:W 1 "pop_operand" ">"))]
  ""
  "popf{<imodesuffix>}"
  [(set_attr "type" "pop")
   (set_attr "mode" "<MODE>")])


;; Reload patterns to support multi-word load/store
;; with non-offsetable address.
(define_expand "reload_noff_store"
  [(parallel [(match_operand 0 "memory_operand" "=m")
              (match_operand 1 "register_operand" "r")
              (match_operand:DI 2 "register_operand" "=&r")])]
  "TARGET_64BIT"
{
  rtx mem = operands[0];
  rtx addr = XEXP (mem, 0);

  emit_move_insn (operands[2], addr);
  mem = replace_equiv_address_nv (mem, operands[2]);

  emit_insn (gen_rtx_SET (mem, operands[1]));
  DONE;
})

(define_expand "reload_noff_load"
  [(parallel [(match_operand 0 "register_operand" "=r")
              (match_operand 1 "memory_operand" "m")
              (match_operand:DI 2 "register_operand" "=r")])]
  "TARGET_64BIT"
{
  rtx mem = operands[1];
  rtx addr = XEXP (mem, 0);

  emit_move_insn (operands[2], addr);
  mem = replace_equiv_address_nv (mem, operands[2]);

  emit_insn (gen_rtx_SET (operands[0], mem));
  DONE;
})

;; Move instructions.

(define_expand "movxi"
  [(set (match_operand:XI 0 "nonimmediate_operand")
	(match_operand:XI 1 "general_operand"))]
  "TARGET_AVX512F"
  "ix86_expand_vector_move (XImode, operands); DONE;")

(define_expand "movoi"
  [(set (match_operand:OI 0 "nonimmediate_operand")
	(match_operand:OI 1 "general_operand"))]
  "TARGET_AVX"
  "ix86_expand_vector_move (OImode, operands); DONE;")

(define_expand "movti"
  [(set (match_operand:TI 0 "nonimmediate_operand")
	(match_operand:TI 1 "general_operand"))]
  "TARGET_64BIT || TARGET_SSE"
{
  if (TARGET_64BIT)
    ix86_expand_move (TImode, operands);
  else
    ix86_expand_vector_move (TImode, operands);
  DONE;
})

;; This expands to what emit_move_complex would generate if we didn't
;; have a movti pattern.  Having this avoids problems with reload on
;; 32-bit targets when SSE is present, but doesn't seem to be harmful
;; to have around all the time.
(define_expand "movcdi"
  [(set (match_operand:CDI 0 "nonimmediate_operand")
	(match_operand:CDI 1 "general_operand"))]
  ""
{
  if (push_operand (operands[0], CDImode))
    emit_move_complex_push (CDImode, operands[0], operands[1]);
  else
    emit_move_complex_parts (operands[0], operands[1]);
  DONE;
})

(define_expand "mov<mode>"
  [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
	(match_operand:SWI1248x 1 "general_operand"))]
  ""
  "ix86_expand_move (<MODE>mode, operands); DONE;")

(define_insn "*mov<mode>_xor"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(match_operand:SWI48 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "xor{l}\t%k0, %k0"
  [(set_attr "type" "alu1")
   (set_attr "mode" "SI")
   (set_attr "length_immediate" "0")])

(define_insn "*mov<mode>_or"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(match_operand:SWI48 1 "constm1_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "1")])

(define_insn "*movxi_internal_avx512f"
  [(set (match_operand:XI 0 "nonimmediate_operand"		"=v,v ,v ,m")
	(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
  "TARGET_AVX512F
   && (register_operand (operands[0], XImode)
       || register_operand (operands[1], XImode))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      if (misaligned_operand (operands[0], XImode)
	  || misaligned_operand (operands[1], XImode))
	return "vmovdqu32\t{%1, %0|%0, %1}";
      else
	return "vmovdqa32\t{%1, %0|%0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
   (set_attr "prefix" "evex")
   (set_attr "mode" "XI")])

(define_insn "*movoi_internal_avx"
  [(set (match_operand:OI 0 "nonimmediate_operand"		"=v,v ,v ,m")
	(match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
  "TARGET_AVX
   && (register_operand (operands[0], OImode)
       || register_operand (operands[1], OImode))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      if (misaligned_operand (operands[0], OImode)
	  || misaligned_operand (operands[1], OImode))
	{
	  if (get_attr_mode (insn) == MODE_XI)
	    return "vmovdqu32\t{%1, %0|%0, %1}";
	  else
	    return "vmovdqu\t{%1, %0|%0, %1}";
	}
      else
	{
	  if (get_attr_mode (insn) == MODE_XI)
	    return "vmovdqa32\t{%1, %0|%0, %1}";
	  else
	    return "vmovdqa\t{%1, %0|%0, %1}";
	}

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "isa" "*,avx2,*,*")
   (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
   (set_attr "prefix" "vex")
   (set (attr "mode")
	(cond [(ior (match_operand 0 "ext_sse_reg_operand")
		    (match_operand 1 "ext_sse_reg_operand"))
		 (const_string "XI")
	       (and (eq_attr "alternative" "1")
		    (match_test "TARGET_AVX512VL"))
		 (const_string "XI")
	      ]
	      (const_string "OI")))])

(define_insn "*movti_internal"
  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
	(match_operand:TI 1 "general_operand"	   "riFo,re,C,BC,vm,v,Yd,r"))]
  "(TARGET_64BIT
    && !(MEM_P (operands[0]) && MEM_P (operands[1])))
   || (TARGET_SSE
       && nonimmediate_or_sse_const_operand (operands[1], TImode)
       && (register_operand (operands[0], TImode)
	   || register_operand (operands[1], TImode)))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_MULTI:
      return "#";

    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      /* TDmode values are passed as TImode on the stack.  Moving them
	 to stack may result in unaligned memory access.  */
      if (misaligned_operand (operands[0], TImode)
	  || misaligned_operand (operands[1], TImode))
	{
	  if (get_attr_mode (insn) == MODE_V4SF)
	    return "%vmovups\t{%1, %0|%0, %1}";
	  else if (get_attr_mode (insn) == MODE_XI)
	    return "vmovdqu32\t{%1, %0|%0, %1}";
	  else
	    return "%vmovdqu\t{%1, %0|%0, %1}";
	}
      else
	{
	  if (get_attr_mode (insn) == MODE_V4SF)
	    return "%vmovaps\t{%1, %0|%0, %1}";
	  else if (get_attr_mode (insn) == MODE_XI)
	    return "vmovdqa32\t{%1, %0|%0, %1}";
	  else
	    return "%vmovdqa\t{%1, %0|%0, %1}";
	}

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "0,1,6,7")
	      (const_string "x64")
	    (eq_attr "alternative" "3")
	      (const_string "sse2")
	   ]
	   (const_string "*")))
   (set (attr "type")
     (cond [(eq_attr "alternative" "0,1,6,7")
	      (const_string "multi")
	    (eq_attr "alternative" "2,3")
	      (const_string "sselog1")
	   ]
	   (const_string "ssemov")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "mode")
	(cond [(eq_attr "alternative" "0,1")
		 (const_string "DI")
	       (ior (match_operand 0 "ext_sse_reg_operand")
		    (match_operand 1 "ext_sse_reg_operand"))
		 (const_string "XI")
	       (and (eq_attr "alternative" "3")
		    (match_test "TARGET_AVX512VL"))
		 (const_string "XI")
	       (match_test "TARGET_AVX")
		 (const_string "TI")
	       (ior (not (match_test "TARGET_SSE2"))
		    (match_test "optimize_function_for_size_p (cfun)"))
		 (const_string "V4SF")
	       (and (eq_attr "alternative" "5")
		    (match_test "TARGET_SSE_TYPELESS_STORES"))
		 (const_string "V4SF")
	       ]
	       (const_string "TI")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "6")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "7")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
	   (symbol_ref "true")))])

(define_split
  [(set (match_operand:TI 0 "sse_reg_operand")
        (match_operand:TI 1 "general_reg_operand"))]
  "TARGET_64BIT && TARGET_SSE4_1
   && reload_completed"
  [(set (match_dup 2)
  	(vec_merge:V2DI
	  (vec_duplicate:V2DI (match_dup 3))
	  (match_dup 2)
	  (const_int 2)))]
{
  operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
  operands[3] = gen_highpart (DImode, operands[1]);

  emit_move_insn (gen_lowpart (DImode, operands[0]),
  		  gen_lowpart (DImode, operands[1]));
})

(define_insn "*movdi_internal"
  [(set (match_operand:DI 0 "nonimmediate_operand"
    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m,*k")
	(match_operand:DI 1 "general_operand"
    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*km,*k,*k,CBC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_MSKMOV:
      return "kmovq\t{%1, %0|%0, %1}";

    case TYPE_MSKLOG:
      if (operands[1] == const0_rtx)
	return "kxorq\t%0, %0, %0";
      else if (operands[1] == constm1_rtx)
	return "kxnorq\t%0, %0, %0";
      gcc_unreachable ();

    case TYPE_MULTI:
      return "#";

    case TYPE_MMX:
      return "pxor\t%0, %0";

    case TYPE_MMXMOV:
      /* Handle broken assemblers that require movd instead of movq.  */
      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
	  && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
	return "movd\t{%1, %0|%0, %1}";
      return "movq\t{%1, %0|%0, %1}";

    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_DI:
	  /* Handle broken assemblers that require movd instead of movq.  */
	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
	    return "%vmovd\t{%1, %0|%0, %1}";
	  return "%vmovq\t{%1, %0|%0, %1}";

	case MODE_TI:
	  /* Handle AVX512 registers set.  */
	  if (EXT_REX_SSE_REG_P (operands[0])
	      || EXT_REX_SSE_REG_P (operands[1]))
	    return "vmovdqa64\t{%1, %0|%0, %1}";
	  return "%vmovdqa\t{%1, %0|%0, %1}";

	case MODE_V2SF:
	  gcc_assert (!TARGET_AVX);
	  return "movlps\t{%1, %0|%0, %1}";
	case MODE_V4SF:
	  return "%vmovaps\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    case TYPE_SSECVT:
      if (SSE_REG_P (operands[0]))
	return "movq2dq\t{%1, %0|%0, %1}";
      else
	return "movdq2q\t{%1, %0|%0, %1}";

    case TYPE_LEA:
      return "lea{q}\t{%E1, %0|%0, %E1}";

    case TYPE_IMOV:
      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
      if (get_attr_mode (insn) == MODE_SI)
	return "mov{l}\t{%k1, %k0|%k0, %k1}";
      else if (which_alternative == 4)
	return "movabs{q}\t{%1, %0|%0, %1}";
      else if (ix86_use_lea_for_mov (insn, operands))
	return "lea{q}\t{%E1, %0|%0, %E1}";
      else
	return "mov{q}\t{%1, %0|%0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "0,1,17,18")
	      (const_string "nox64")
	    (eq_attr "alternative" "2,3,4,5,10,11,23,25")
	      (const_string "x64")
	    (eq_attr "alternative" "19,20")
	      (const_string "x64_sse2")
	    (eq_attr "alternative" "21,22")
	      (const_string "sse2")
	   ]
	   (const_string "*")))
   (set (attr "type")
     (cond [(eq_attr "alternative" "0,1,17,18")
	      (const_string "multi")
	    (eq_attr "alternative" "6")
	      (const_string "mmx")
	    (eq_attr "alternative" "7,8,9,10,11")
	      (const_string "mmxmov")
	    (eq_attr "alternative" "12")
	      (const_string "sselog1")
	    (eq_attr "alternative" "13,14,15,16,19,20")
	      (const_string "ssemov")
	    (eq_attr "alternative" "21,22")
	      (const_string "ssecvt")
	    (eq_attr "alternative" "23,24,25,26")
	      (const_string "mskmov")
	    (eq_attr "alternative" "27")
	      (const_string "msklog")
	    (and (match_operand 0 "register_operand")
		 (match_operand 1 "pic_32bit_operand"))
	      (const_string "lea")
	   ]
	   (const_string "imov")))
   (set (attr "modrm")
     (if_then_else
       (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
       (const_string "0")
       (const_string "*")))
   (set (attr "length_immediate")
     (if_then_else
       (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
       (const_string "8")
       (const_string "*")))
   (set (attr "prefix_rex")
     (if_then_else
       (eq_attr "alternative" "10,11,19,20")
       (const_string "1")
       (const_string "*")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "prefix_data16")
     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
       (const_string "1")
       (const_string "*")))
   (set (attr "mode")
     (cond [(eq_attr "alternative" "2")
	      (const_string "SI")
	    (eq_attr "alternative" "12,13")
	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
			  (match_operand 1 "ext_sse_reg_operand"))
		       (const_string "TI")
		     (match_test "TARGET_AVX")
		       (const_string "TI")
		     (ior (not (match_test "TARGET_SSE2"))
			  (match_test "optimize_function_for_size_p (cfun)"))
		       (const_string "V4SF")
		    ]
		    (const_string "TI"))

	    (and (eq_attr "alternative" "14,15,16")
		 (not (match_test "TARGET_SSE2")))
	      (const_string "V2SF")
	   ]
	   (const_string "DI")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "10,17,19")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "11,18,20")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
	   (symbol_ref "true")))
   (set (attr "enabled")
     (cond [(eq_attr "alternative" "15")
              (if_then_else
		(match_test "TARGET_STV && TARGET_SSE2")
		(symbol_ref "false")
		(const_string "*"))
	    (eq_attr "alternative" "16")
              (if_then_else
		(match_test "TARGET_STV && TARGET_SSE2")
		(symbol_ref "true")
		(symbol_ref "false"))
	   ]
	   (const_string "*")))])

(define_split
  [(set (match_operand:<DWI> 0 "general_reg_operand")
        (match_operand:<DWI> 1 "sse_reg_operand"))]
  "TARGET_SSE4_1
   && reload_completed"
  [(set (match_dup 2)
  	(vec_select:DWIH
	  (match_dup 3)
	  (parallel [(const_int 1)])))]
{
  operands[2] = gen_highpart (<MODE>mode, operands[0]);
  operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);

  emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
  		  gen_lowpart (<MODE>mode, operands[1]));
})

(define_split
  [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
        (match_operand:DWI 1 "general_gr_operand"))]
  "reload_completed"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

(define_split
  [(set (match_operand:DI 0 "sse_reg_operand")
        (match_operand:DI 1 "general_reg_operand"))]
  "!TARGET_64BIT && TARGET_SSE4_1
   && reload_completed"
  [(set (match_dup 2)
  	(vec_merge:V4SI
	  (vec_duplicate:V4SI (match_dup 3))
	  (match_dup 2)
	  (const_int 2)))]
{
  operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
  operands[3] = gen_highpart (SImode, operands[1]);

  emit_move_insn (gen_lowpart (SImode, operands[0]),
  		  gen_lowpart (SImode, operands[1]));
})

;; movabsq $0x0012345678000000, %rax is longer
;; than movl $0x12345678, %eax; shlq $24, %rax.
(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
	(match_operand:DI 1 "const_int_operand"))]
  "TARGET_64BIT
   && optimize_insn_for_size_p ()
   && LEGACY_INT_REG_P (operands[0])
   && !x86_64_immediate_operand (operands[1], DImode)
   && !x86_64_zext_immediate_operand (operands[1], DImode)
   && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
        & ~(HOST_WIDE_INT) 0xffffffff)
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(set (match_dup 0) (match_dup 1))
   (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int shift = ctz_hwi (UINTVAL (operands[1]));
  operands[1] = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
  operands[2] = gen_int_mode (shift, QImode);
})

(define_insn "*movsi_internal"
  [(set (match_operand:SI 0 "nonimmediate_operand"
    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k")
	(match_operand:SI 1 "general_operand"
    "g ,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,*v,r  ,*r,*km,*k ,CBC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_MSKMOV:
      return "kmovd\t{%1, %0|%0, %1}";

    case TYPE_MSKLOG:
      if (operands[1] == const0_rtx)
	return "kxord\t%0, %0, %0";
      else if (operands[1] == constm1_rtx)
	return "kxnord\t%0, %0, %0";
      gcc_unreachable ();

    case TYPE_SSEMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_SI:
          return "%vmovd\t{%1, %0|%0, %1}";
	case MODE_TI:
	  return "%vmovdqa\t{%1, %0|%0, %1}";
	case MODE_XI:
	  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";

	case MODE_V4SF:
	  return "%vmovaps\t{%1, %0|%0, %1}";

	case MODE_SF:
	  gcc_assert (!TARGET_AVX);
          return "movss\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    case TYPE_MMX:
      return "pxor\t%0, %0";

    case TYPE_MMXMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_DI:
	  return "movq\t{%1, %0|%0, %1}";
	case MODE_SI:
	  return "movd\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    case TYPE_LEA:
      return "lea{l}\t{%E1, %0|%0, %E1}";

    case TYPE_IMOV:
      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
      if (ix86_use_lea_for_mov (insn, operands))
	return "lea{l}\t{%E1, %0|%0, %E1}";
      else
	return "mov{l}\t{%1, %0|%0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "12,13")
	      (const_string "sse2")
	   ]
	   (const_string "*")))
   (set (attr "type")
     (cond [(eq_attr "alternative" "2")
	      (const_string "mmx")
	    (eq_attr "alternative" "3,4,5,6,7")
	      (const_string "mmxmov")
	    (eq_attr "alternative" "8")
	      (const_string "sselog1")
	    (eq_attr "alternative" "9,10,11,12,13")
	      (const_string "ssemov")
	    (eq_attr "alternative" "14,15,16")
	      (const_string "mskmov")
	    (eq_attr "alternative" "17")
	      (const_string "msklog")
	    (and (match_operand 0 "register_operand")
		 (match_operand 1 "pic_32bit_operand"))
	      (const_string "lea")
	   ]
	   (const_string "imov")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "prefix_data16")
     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
       (const_string "1")
       (const_string "*")))
   (set (attr "mode")
     (cond [(eq_attr "alternative" "2,3")
	      (const_string "DI")
	    (eq_attr "alternative" "8,9")
	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
			  (match_operand 1 "ext_sse_reg_operand"))
		       (const_string "XI")
		     (match_test "TARGET_AVX")
		       (const_string "TI")
		     (ior (not (match_test "TARGET_SSE2"))
			  (match_test "optimize_function_for_size_p (cfun)"))
		       (const_string "V4SF")
		    ]
		    (const_string "TI"))

	    (and (eq_attr "alternative" "10,11")
	         (not (match_test "TARGET_SSE2")))
	      (const_string "SF")
	   ]
	   (const_string "SI")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "6,12")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "7,13")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
	   (symbol_ref "true")))])

(define_insn "*movhi_internal"
  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m,k")
	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,r,km,k,k,CBC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_IMOVX:
      /* movzwl is faster than movw on p2 due to partial word stalls,
	 though not as fast as an aligned movl.  */
      return "movz{wl|x}\t{%1, %k0|%k0, %1}";

    case TYPE_MSKMOV:
      switch (which_alternative)
	{
	case 4:
	  return "kmovw\t{%k1, %0|%0, %k1}";
	case 6:
	  return "kmovw\t{%1, %k0|%k0, %1}";
	case 5:
	case 7:
	  return "kmovw\t{%1, %0|%0, %1}";
	default:
	  gcc_unreachable ();
	}

    case TYPE_MSKLOG:
      if (operands[1] == const0_rtx)
	return "kxorw\t%0, %0, %0";
      else if (operands[1] == constm1_rtx)
	return "kxnorw\t%0, %0, %0";
      gcc_unreachable ();

    default:
      if (get_attr_mode (insn) == MODE_SI)
	return "mov{l}\t{%k1, %k0|%k0, %k1}";
      else
	return "mov{w}\t{%1, %0|%0, %1}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "4,5,6,7")
	      (const_string "mskmov")
	    (eq_attr "alternative" "8")
	      (const_string "msklog")
	    (match_test "optimize_function_for_size_p (cfun)")
	      (const_string "imov")
	    (and (eq_attr "alternative" "0")
		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
		      (not (match_test "TARGET_HIMODE_MATH"))))
	      (const_string "imov")
	    (and (eq_attr "alternative" "1,2")
		 (match_operand:HI 1 "aligned_operand"))
	      (const_string "imov")
	    (and (match_test "TARGET_MOVX")
		 (eq_attr "alternative" "0,2"))
	      (const_string "imovx")
	   ]
	   (const_string "imov")))
    (set (attr "prefix")
      (if_then_else (eq_attr "alternative" "4,5,6,7,8")
	(const_string "vex")
	(const_string "orig")))
    (set (attr "mode")
      (cond [(eq_attr "type" "imovx")
	       (const_string "SI")
	     (and (eq_attr "alternative" "1,2")
		  (match_operand:HI 1 "aligned_operand"))
	       (const_string "SI")
	     (and (eq_attr "alternative" "0")
		  (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
		       (not (match_test "TARGET_HIMODE_MATH"))))
	       (const_string "SI")
	    ]
	    (const_string "HI")))])

;; Situation is quite tricky about when to choose full sized (SImode) move
;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
;; partial register dependency machines (such as AMD Athlon), where QImode
;; moves issue extra dependency and for partial register stalls machines
;; that don't use QImode patterns (and QImode move cause stall on the next
;; instruction).
;;
;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there.  Then we use movzx.

(define_insn "*movqi_internal"
  [(set (match_operand:QI 0 "nonimmediate_operand"
			"=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k,k,k")
	(match_operand:QI 1 "general_operand"
			"Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
  char buf[128];
  const char *ops;
  const char *suffix;

  switch (get_attr_type (insn))
    {
    case TYPE_IMOVX:
      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
      return "movz{bl|x}\t{%1, %k0|%k0, %1}";

    case TYPE_MSKMOV:
      switch (which_alternative)
        {
	case 9:
	  ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
	  break;
	case 11:
	  ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
	  break;
	case 12:
	case 13:
	  gcc_assert (TARGET_AVX512DQ);
	  /* FALLTHRU */
	case 10:
	  ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
	  break;
	default:
	  gcc_unreachable ();
	}

      suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";

      snprintf (buf, sizeof (buf), ops, suffix);
      output_asm_insn (buf, operands);
      return "";

    case TYPE_MSKLOG:
      if (operands[1] == const0_rtx)
	{
	  if (get_attr_mode (insn) == MODE_HI)
	    return "kxorw\t%0, %0, %0";
	  else
	    return "kxorb\t%0, %0, %0";
	}
      else if (operands[1] == constm1_rtx)
	{
	  gcc_assert (TARGET_AVX512DQ);
	  return "kxnorb\t%0, %0, %0";
	}
      gcc_unreachable ();

    default:
      if (get_attr_mode (insn) == MODE_SI)
	return "mov{l}\t{%k1, %k0|%k0, %k1}";
      else
	return "mov{b}\t{%1, %0|%0, %1}";
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "1,2")
	      (const_string "x64")
	    (eq_attr "alternative" "12,13,15")
	      (const_string "avx512dq")
	   ]
	   (const_string "*")))
   (set (attr "type")
     (cond [(eq_attr "alternative" "9,10,11,12,13")
	      (const_string "mskmov")
	    (eq_attr "alternative" "14,15")
	      (const_string "msklog")
	    (and (eq_attr "alternative" "7")
		 (not (match_operand:QI 1 "aligned_operand")))
	      (const_string "imovx")
	    (match_test "optimize_function_for_size_p (cfun)")
	      (const_string "imov")
	    (and (eq_attr "alternative" "5")
		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
		      (not (match_test "TARGET_QIMODE_MATH"))))
	      (const_string "imov")
	    (eq_attr "alternative" "5,7")
	      (const_string "imovx")
	    (and (match_test "TARGET_MOVX")
		 (eq_attr "alternative" "4"))
	      (const_string "imovx")
	   ]
	   (const_string "imov")))
   (set (attr "prefix")
     (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
       (const_string "vex")
       (const_string "orig")))
   (set (attr "mode")
      (cond [(eq_attr "alternative" "5,6,7")
	       (const_string "SI")
	     (eq_attr "alternative" "8")
	       (const_string "QI")
	     (and (eq_attr "alternative" "9,10,11,14")
		  (not (match_test "TARGET_AVX512DQ")))
	       (const_string "HI")
	     (eq_attr "type" "imovx")
	       (const_string "SI")
	     ;; For -Os, 8-bit immediates are always shorter than 32-bit
	     ;; ones.
	     (and (eq_attr "type" "imov")
		  (and (eq_attr "alternative" "3")
		       (match_test "optimize_function_for_size_p (cfun)")))
	       (const_string "QI")
	     ;; For -Os, movl where one or both operands are NON_Q_REGS
	     ;; and both are LEGACY_REGS is shorter than movb.
	     ;; Otherwise movb and movl sizes are the same, so decide purely
	     ;; based on speed factors.
	     (and (eq_attr "type" "imov")
		  (and (eq_attr "alternative" "1")
		       (match_test "optimize_function_for_size_p (cfun)")))
	       (const_string "SI")
	     (and (eq_attr "type" "imov")
		  (and (eq_attr "alternative" "0,1,2,3")
		       (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
			    (not (match_test "TARGET_PARTIAL_REG_STALL")))))
	       (const_string "SI")
	     ;; Avoid partial register stalls when not using QImode arithmetic
	     (and (eq_attr "type" "imov")
		  (and (eq_attr "alternative" "0,1,2,3")
		       (and (match_test "TARGET_PARTIAL_REG_STALL")
			    (not (match_test "TARGET_QIMODE_MATH")))))
	       (const_string "SI")
	   ]
	   (const_string "QI")))])

;; Stores and loads of ax to arbitrary constant address.
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabs<mode>_1"
  [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
	(match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
  "TARGET_LP64 && ix86_check_movabs (insn, 0)"
{
  /* Recover the full memory rtx.  */
  operands[0] = SET_DEST (PATTERN (insn));
  switch (which_alternative)
    {
    case 0:
      return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
    case 1:
      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "imov")
   (set_attr "modrm" "0,*")
   (set_attr "length_address" "8,0")
   (set_attr "length_immediate" "0,*")
   (set_attr "memory" "store")
   (set_attr "mode" "<MODE>")])

(define_insn "*movabs<mode>_2"
  [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
        (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
  "TARGET_LP64 && ix86_check_movabs (insn, 1)"
{
  /* Recover the full memory rtx.  */
  operands[1] = SET_SRC (PATTERN (insn));
  switch (which_alternative)
    {
    case 0:
      return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
    case 1:
      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "imov")
   (set_attr "modrm" "0,*")
   (set_attr "length_address" "8,0")
   (set_attr "length_immediate" "0")
   (set_attr "memory" "load")
   (set_attr "mode" "<MODE>")])

(define_insn "*swap<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "+r")
	(match_operand:SWI48 1 "register_operand" "+r"))
   (set (match_dup 1)
	(match_dup 0))]
  ""
  "xchg{<imodesuffix>}\t%1, %0"
  [(set_attr "type" "imov")
   (set_attr "mode" "<MODE>")
   (set_attr "pent_pair" "np")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "double")])

(define_insn "*swap<mode>"
  [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
	(match_operand:SWI12 1 "register_operand" "+<r>,r"))
   (set (match_dup 1)
	(match_dup 0))]
  ""
  "@
   xchg{<imodesuffix>}\t%1, %0
   xchg{l}\t%k1, %k0"
  [(set_attr "type" "imov")
   (set_attr "mode" "<MODE>,SI")
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "0")
	      (symbol_ref "false")]
	   (symbol_ref "true")))
   ;; Potential partial reg stall on alternative 1.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "1")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))
   (set_attr "pent_pair" "np")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "double")])

(define_peephole2
  [(set (match_operand:SWI 0 "general_reg_operand")
	(match_operand:SWI 1 "general_reg_operand"))
   (set (match_dup 1)
	(match_operand:SWI 2 "general_reg_operand"))
   (set (match_dup 2) (match_dup 0))]
  "peep2_reg_dead_p (3, operands[0])
   && optimize_insn_for_size_p ()"
  [(parallel [(set (match_dup 1) (match_dup 2))
	      (set (match_dup 2) (match_dup 1))])])

(define_expand "movstrict<mode>"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
	(match_operand:SWI12 1 "general_operand"))]
  ""
{
  gcc_assert (SUBREG_P (operands[0]));
  if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
      || GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT)
    FAIL;
})

(define_insn "*movstrict<mode>_1"
  [(set (strict_low_part
	  (match_operand:SWI12 0 "register_operand" "+<r>"))
	(match_operand:SWI12 1 "general_operand" "<r>mn"))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "imov")
   (set_attr "mode" "<MODE>")])

(define_insn "*movstrict<mode>_xor"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(match_operand:SWI12 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "xor{<imodesuffix>}\t%0, %0"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "0")])

(define_expand "extv<mode>"
  [(set (match_operand:SWI24 0 "register_operand")
	(sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
			    (match_operand:SI 2 "const_int_operand")
			    (match_operand:SI 3 "const_int_operand")))]
  ""
{
  /* Handle extractions from %ah et al.  */
  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
    FAIL;

  unsigned int regno = reg_or_subregno (operands[1]);

  /* Be careful to expand only with registers having upper parts.  */
  if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
    operands[1] = copy_to_reg (operands[1]);
})

(define_insn "*extv<mode>"
  [(set (match_operand:SWI24 0 "register_operand" "=R")
	(sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q")
			    (const_int 8)
			    (const_int 8)))]
  ""
  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
  [(set_attr "type" "imovx")
   (set_attr "mode" "SI")])

(define_expand "extzv<mode>"
  [(set (match_operand:SWI248 0 "register_operand")
	(zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
			     (match_operand:SI 2 "const_int_operand")
			     (match_operand:SI 3 "const_int_operand")))]
  ""
{
  if (ix86_expand_pextr (operands))
    DONE;

  /* Handle extractions from %ah et al.  */
  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
    FAIL;

  unsigned int regno = reg_or_subregno (operands[1]);

  /* Be careful to expand only with registers having upper parts.  */
  if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
    operands[1] = copy_to_reg (operands[1]);
})

(define_insn "*extzvqi_mem_rex64"
  [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
	(subreg:QI
	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
			   (const_int 8)
			   (const_int 8)) 0))]
  "TARGET_64BIT && reload_completed"
  "mov{b}\t{%h1, %0|%0, %h1}"
  [(set_attr "type" "imov")
   (set_attr "mode" "QI")])

(define_insn "*extzv<mode>"
  [(set (match_operand:SWI248 0 "register_operand" "=R")
	(zero_extract:SWI248 (match_operand 1 "ext_register_operand" "Q")
			     (const_int 8)
			     (const_int 8)))]
  ""
  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
  [(set_attr "type" "imovx")
   (set_attr "mode" "SI")])

(define_insn "*extzvqi"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
	(subreg:QI
	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q")
			   (const_int 8)
			   (const_int 8)) 0))]
  ""
{
  switch (get_attr_type (insn))
    {
    case TYPE_IMOVX:
      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
    default:
      return "mov{b}\t{%h1, %0|%0, %h1}";
    }
}
  [(set_attr "isa" "*,*,nox64")
   (set (attr "type")
     (if_then_else (and (match_operand:QI 0 "register_operand")
			(ior (not (match_operand:QI 0 "QIreg_operand"))
			     (match_test "TARGET_MOVX")))
	(const_string "imovx")
	(const_string "imov")))
   (set (attr "mode")
     (if_then_else (eq_attr "type" "imovx")
	(const_string "SI")
	(const_string "QI")))])

(define_peephole2
  [(set (match_operand:QI 0 "register_operand")
	(subreg:QI
	  (zero_extract:SI (match_operand 1 "ext_register_operand")
			   (const_int 8)
			   (const_int 8)) 0))
   (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
  "TARGET_64BIT
   && peep2_reg_dead_p (2, operands[0])"
  [(set (match_dup 2)
	(subreg:QI
	  (zero_extract:SI (match_dup 1)
			   (const_int 8)
			   (const_int 8)) 0))])

(define_expand "insv<mode>"
  [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
			     (match_operand:SI 1 "const_int_operand")
			     (match_operand:SI 2 "const_int_operand"))
        (match_operand:SWI248 3 "register_operand"))]
  ""
{
  rtx dst;

  if (ix86_expand_pinsr (operands))
    DONE;

  /* Handle insertions to %ah et al.  */
  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
    FAIL;

  unsigned int regno = reg_or_subregno (operands[0]);

  /* Be careful to expand only with registers having upper parts.  */
  if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
    dst = copy_to_reg (operands[0]);
  else
    dst = operands[0];

  emit_insn (gen_insv<mode>_1 (dst, operands[3]));

  /* Fix up the destination if needed.  */
  if (dst != operands[0])
    emit_move_insn (operands[0], dst);

  DONE;
})

(define_insn "*insvqi_1_mem_rex64"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (match_operand:QI 1 "norex_memory_operand" "Bn") 0))]
  "TARGET_64BIT && reload_completed"
  "mov{b}\t{%1, %h0|%h0, %1}"
  [(set_attr "type" "imov")
   (set_attr "mode" "QI")])

(define_insn "insv<mode>_1"
  [(set (zero_extract:SWI248 (match_operand 0 "ext_register_operand" "+Q,Q")
			     (const_int 8)
			     (const_int 8))
	(match_operand:SWI248 1 "general_operand" "QnBc,m"))]
  ""
{
  if (CONST_INT_P (operands[1]))
    operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
  return "mov{b}\t{%b1, %h0|%h0, %b1}";
}
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "imov")
   (set_attr "mode" "QI")])

(define_insn "*insvqi_1"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (match_operand:QI 1 "general_operand" "QnBc,m") 0))]
  ""
  "mov{b}\t{%1, %h0|%h0, %1}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "imov")
   (set_attr "mode" "QI")])

(define_peephole2
  [(set (match_operand:QI 0 "register_operand")
	(match_operand:QI 1 "norex_memory_operand"))
   (set (zero_extract:SI (match_operand 2 "ext_register_operand")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI (match_dup 0) 0))]
  "TARGET_64BIT
   && peep2_reg_dead_p (2, operands[0])"
  [(set (zero_extract:SI (match_dup 2)
			 (const_int 8)
			 (const_int 8))
	   (subreg:SI (match_dup 1) 0))])

(define_code_iterator any_extract [sign_extract zero_extract])

(define_insn "*insvqi_2"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(any_extract:SI (match_operand 1 "ext_register_operand" "Q")
			(const_int 8)
			(const_int 8)))]
  ""
  "mov{b}\t{%h1, %h0|%h0, %h1}"
  [(set_attr "type" "imov")
   (set_attr "mode" "QI")])

(define_insn "*insvqi_3"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(any_shiftrt:SI (match_operand:SI 1 "register_operand" "Q")
			(const_int 8)))]
  ""
  "mov{b}\t{%h1, %h0|%h0, %h1}"
  [(set_attr "type" "imov")
   (set_attr "mode" "QI")])

;; Floating point push instructions.

(define_insn "*pushtf"
  [(set (match_operand:TF 0 "push_operand" "=<,<")
	(match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
  "TARGET_64BIT || TARGET_SSE"
{
  /* This insn should be already split before reg-stack.  */
  return "#";
}
  [(set_attr "isa" "*,x64")
   (set_attr "type" "multi")
   (set_attr "unit" "sse,*")
   (set_attr "mode" "TF,DI")])

;; %%% Kill this when call knows how to work this out.
(define_split
  [(set (match_operand:TF 0 "push_operand")
	(match_operand:TF 1 "sse_reg_operand"))]
  "TARGET_SSE && reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
   (set (match_dup 0) (match_dup 1))]
{
  /* Preserve memory attributes. */
  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})

(define_insn_and_split "*pushxf_rounded"
  [(set (mem:XF
	  (pre_modify:P
	    (reg:P SP_REG)
	    (plus:P (reg:P SP_REG) (const_int -16))))
	(match_operand:XF 0 "nonmemory_no_elim_operand" "f,r,*r,C"))]
  "TARGET_64BIT"
  "#"
  "&& 1"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
   (set (match_dup 1) (match_dup 0))]
{
  rtx pat = PATTERN (curr_insn);
  operands[1] = SET_DEST (pat);

  /* Preserve memory attributes. */
  operands[1] = replace_equiv_address (operands[1], stack_pointer_rtx);
}
  [(set_attr "type" "multi")
   (set_attr "unit" "i387,*,*,*")
   (set (attr "mode")
	(cond [(eq_attr "alternative" "1,2,3")
		 (const_string "DI")
	      ]
	      (const_string "XF")))
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "1")
              (symbol_ref "false")]
           (symbol_ref "true")))])

(define_insn "*pushxf"
  [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
	(match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
  ""
{
  /* This insn should be already split before reg-stack.  */
  return "#";
}
  [(set_attr "isa" "*,*,*,nox64,x64")
   (set_attr "type" "multi")
   (set_attr "unit" "i387,*,*,*,*")
   (set (attr "mode")
	(cond [(eq_attr "alternative" "1,2,3,4")
		 (if_then_else (match_test "TARGET_64BIT")
		   (const_string "DI")
		   (const_string "SI"))
	      ]
	      (const_string "XF")))
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "1")
              (symbol_ref "false")]
           (symbol_ref "true")))])

;; %%% Kill this when call knows how to work this out.
(define_split
  [(set (match_operand:XF 0 "push_operand")
	(match_operand:XF 1 "fp_register_operand"))]
  "reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
   (set (match_dup 0) (match_dup 1))]
{
  operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
  /* Preserve memory attributes. */
  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})

(define_insn "*pushdf"
  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
	(match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,x"))]
  ""
{
  /* This insn should be already split before reg-stack.  */
  return "#";
}
  [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
   (set_attr "type" "multi")
   (set_attr "unit" "i387,*,*,*,*,sse")
   (set_attr "mode" "DF,SI,SI,SI,DI,DF")
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "1")
              (symbol_ref "false")]
           (symbol_ref "true")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "1")
              (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
           (symbol_ref "true")))])
   
;; %%% Kill this when call knows how to work this out.
(define_split
  [(set (match_operand:DF 0 "push_operand")
	(match_operand:DF 1 "any_fp_register_operand"))]
  "reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
   (set (match_dup 0) (match_dup 1))]
{
  /* Preserve memory attributes. */
  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})

(define_insn "*pushsf_rex64"
  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
  "TARGET_64BIT"
{
  /* Anything else should be already split before reg-stack.  */
  if (which_alternative != 1)
    return "#";
  return "push{q}\t%q1";
}
  [(set_attr "type" "multi,push,multi")
   (set_attr "unit" "i387,*,*")
   (set_attr "mode" "SF,DI,SF")])

(define_insn "*pushsf"
  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
	(match_operand:SF 1 "general_no_elim_operand" "f,rmF,x"))]
  "!TARGET_64BIT"
{
  /* Anything else should be already split before reg-stack.  */
  if (which_alternative != 1)
    return "#";
  return "push{l}\t%1";
}
  [(set_attr "type" "multi,push,multi")
   (set_attr "unit" "i387,*,*")
   (set_attr "mode" "SF,SI,SF")])

;; %%% Kill this when call knows how to work this out.
(define_split
  [(set (match_operand:SF 0 "push_operand")
	(match_operand:SF 1 "any_fp_register_operand"))]
  "reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
   (set (match_dup 0) (match_dup 1))]
{
  rtx op = XEXP (operands[0], 0);
  if (GET_CODE (op) == PRE_DEC)
    {
      gcc_assert (!TARGET_64BIT);
      op = GEN_INT (-4);
    }
  else
    {
      op = XEXP (XEXP (op, 1), 1);
      gcc_assert (CONST_INT_P (op));
    }
  operands[2] = op;
  /* Preserve memory attributes. */
  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})

(define_split
  [(set (match_operand:SF 0 "push_operand")
	(match_operand:SF 1 "memory_operand"))]
  "reload_completed
   && find_constant_src (insn)"
  [(set (match_dup 0) (match_dup 2))]
  "operands[2] = find_constant_src (curr_insn);")

(define_split
  [(set (match_operand 0 "push_operand")
	(match_operand 1 "general_gr_operand"))]
  "reload_completed
   && (GET_MODE (operands[0]) == TFmode
       || GET_MODE (operands[0]) == XFmode
       || GET_MODE (operands[0]) == DFmode)"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

;; Floating point move instructions.

(define_expand "movtf"
  [(set (match_operand:TF 0 "nonimmediate_operand")
	(match_operand:TF 1 "nonimmediate_operand"))]
  "TARGET_64BIT || TARGET_SSE"
  "ix86_expand_move (TFmode, operands); DONE;")

(define_expand "mov<mode>"
  [(set (match_operand:X87MODEF 0 "nonimmediate_operand")
	(match_operand:X87MODEF 1 "general_operand"))]
  ""
  "ix86_expand_move (<MODE>mode, operands); DONE;")

(define_insn "*movtf_internal"
  [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
	(match_operand:TF 1 "general_operand"	   "C ,vm,v,*roF,*rC"))]
  "(TARGET_64BIT || TARGET_SSE)
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress || reload_completed
       || !CONST_DOUBLE_P (operands[1])
       || ((optimize_function_for_size_p (cfun)
	    || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
	   && standard_sse_constant_p (operands[1], TFmode) == 1
	   && !memory_operand (operands[0], TFmode))
       || (!TARGET_MEMORY_MISMATCH_STALL
	   && memory_operand (operands[0], TFmode)))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      /* Handle misaligned load/store since we
         don't have movmisaligntf pattern. */
      if (misaligned_operand (operands[0], TFmode)
	  || misaligned_operand (operands[1], TFmode))
	{
	  if (get_attr_mode (insn) == MODE_V4SF)
	    return "%vmovups\t{%1, %0|%0, %1}";
	  else if (TARGET_AVX512VL
		   && (EXT_REX_SSE_REG_P (operands[0])
		       || EXT_REX_SSE_REG_P (operands[1])))
	    return "vmovdqu64\t{%1, %0|%0, %1}";
	  else
	    return "%vmovdqu\t{%1, %0|%0, %1}";
	}
      else
	{
	  if (get_attr_mode (insn) == MODE_V4SF)
	    return "%vmovaps\t{%1, %0|%0, %1}";
	  else if (TARGET_AVX512VL
		   && (EXT_REX_SSE_REG_P (operands[0])
		       || EXT_REX_SSE_REG_P (operands[1])))
	    return "vmovdqa64\t{%1, %0|%0, %1}";
	  else
	    return "%vmovdqa\t{%1, %0|%0, %1}";
	}

    case TYPE_MULTI:
	return "#";

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "isa" "*,*,*,x64,x64")
   (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "mode")
        (cond [(eq_attr "alternative" "3,4")
		 (const_string "DI")
	       (match_test "TARGET_AVX")
		 (const_string "TI")
	       (ior (not (match_test "TARGET_SSE2"))
		    (match_test "optimize_function_for_size_p (cfun)"))
		 (const_string "V4SF")
	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
		 (const_string "V4SF")
	       (and (eq_attr "alternative" "2")
		    (match_test "TARGET_SSE_TYPELESS_STORES"))
		 (const_string "V4SF")
	       ]
	       (const_string "TI")))])

(define_split
  [(set (match_operand:TF 0 "nonimmediate_gr_operand")
        (match_operand:TF 1 "general_gr_operand"))]
  "reload_completed"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

;; Possible store forwarding (partial memory) stall
;; in alternatives 4, 6, 7 and 8.
(define_insn "*movxf_internal"
  [(set (match_operand:XF 0 "nonimmediate_operand"
	 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r  ,o ,o")
	(match_operand:XF 1 "general_operand"
	 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress || reload_completed
       || !CONST_DOUBLE_P (operands[1])
       || ((optimize_function_for_size_p (cfun)
	    || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
	   && standard_80387_constant_p (operands[1]) > 0
	   && !memory_operand (operands[0], XFmode))
       || (!TARGET_MEMORY_MISMATCH_STALL
	   && memory_operand (operands[0], XFmode))
       || !TARGET_HARD_XF_REGS)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_FMOV:
      if (which_alternative == 2)
        return standard_80387_constant_opcode (operands[1]);
      return output_387_reg_move (insn, operands);

    case TYPE_MULTI:
      return "#";

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
	(cond [(eq_attr "alternative" "7,10")
		 (const_string "nox64")
	       (eq_attr "alternative" "8,11")
		 (const_string "x64")
	      ]
	      (const_string "*")))
   (set (attr "type")
	(cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
		 (const_string "multi")
	      ]
	      (const_string "fmov")))
   (set (attr "mode")
	(cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
		 (if_then_else (match_test "TARGET_64BIT")
		   (const_string "DI")
		   (const_string "SI"))
	      ]
	      (const_string "XF")))
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "3,4")
              (symbol_ref "false")]
           (symbol_ref "true")))
   (set (attr "enabled")
     (cond [(eq_attr "alternative" "9,10,11")
              (if_then_else
		(match_test "TARGET_HARD_XF_REGS")
		(symbol_ref "false")
		(const_string "*"))
            (not (match_test "TARGET_HARD_XF_REGS"))
	      (symbol_ref "false")
	   ]
	   (const_string "*")))])
   
(define_split
  [(set (match_operand:XF 0 "nonimmediate_gr_operand")
        (match_operand:XF 1 "general_gr_operand"))]
  "reload_completed"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
(define_insn "*movdf_internal"
  [(set (match_operand:DF 0 "nonimmediate_operand"
    "=Yf*f,m   ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r  ,o ,r  ,m")
	(match_operand:DF 1 "general_operand"
    "Yf*fm,Yf*f,G   ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress || reload_completed
       || !CONST_DOUBLE_P (operands[1])
       || ((optimize_function_for_size_p (cfun)
	    || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
	   && ((IS_STACK_MODE (DFmode)
		&& standard_80387_constant_p (operands[1]) > 0)
	       || (TARGET_SSE2 && TARGET_SSE_MATH
		   && standard_sse_constant_p (operands[1], DFmode) == 1))
	   && !memory_operand (operands[0], DFmode))
       || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
	   && memory_operand (operands[0], DFmode))
       || !TARGET_HARD_DF_REGS)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_FMOV:
      if (which_alternative == 2)
        return standard_80387_constant_opcode (operands[1]);
      return output_387_reg_move (insn, operands);

    case TYPE_MULTI:
      return "#";

    case TYPE_IMOV:
      if (get_attr_mode (insn) == MODE_SI)
	return "mov{l}\t{%1, %k0|%k0, %1}";
      else if (which_alternative == 11)
	return "movabs{q}\t{%1, %0|%0, %1}";
      else
	return "mov{q}\t{%1, %0|%0, %1}";

    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_DF:
	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
	    return "vmovsd\t{%d1, %0|%0, %d1}";
	  return "%vmovsd\t{%1, %0|%0, %1}";

	case MODE_V4SF:
	  return "%vmovaps\t{%1, %0|%0, %1}";
	case MODE_V8DF:
	  return "vmovapd\t{%g1, %g0|%g0, %g1}";
	case MODE_V2DF:
	  return "%vmovapd\t{%1, %0|%0, %1}";

	case MODE_V2SF:
	  gcc_assert (!TARGET_AVX);
	  return "movlps\t{%1, %0|%0, %1}";
	case MODE_V1DF:
	  gcc_assert (!TARGET_AVX);
	  return "movlpd\t{%1, %0|%0, %1}";

	case MODE_DI:
	  /* Handle broken assemblers that require movd instead of movq.  */
	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
	    return "%vmovd\t{%1, %0|%0, %1}";
	  return "%vmovq\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
	(cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
		 (const_string "nox64")
	       (eq_attr "alternative" "8,9,10,11,24,25")
		 (const_string "x64")
	       (eq_attr "alternative" "12,13,14,15")
		 (const_string "sse2")
	       (eq_attr "alternative" "20,21")
		 (const_string "x64_sse2")
	      ]
	      (const_string "*")))
   (set (attr "type")
	(cond [(eq_attr "alternative" "0,1,2")
		 (const_string "fmov")
	       (eq_attr "alternative" "3,4,5,6,7,22,23")
		 (const_string "multi")
	       (eq_attr "alternative" "8,9,10,11,24,25")
		 (const_string "imov")
	       (eq_attr "alternative" "12,16")
		 (const_string "sselog1")
	      ]
	      (const_string "ssemov")))
   (set (attr "modrm")
     (if_then_else (eq_attr "alternative" "11")
       (const_string "0")
       (const_string "*")))
   (set (attr "length_immediate")
     (if_then_else (eq_attr "alternative" "11")
       (const_string "8")
       (const_string "*")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "prefix_data16")
     (if_then_else
       (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
	    (eq_attr "mode" "V1DF"))
       (const_string "1")
       (const_string "*")))
   (set (attr "mode")
	(cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
		 (const_string "SI")
	       (eq_attr "alternative" "8,9,11,20,21,24,25")
		 (const_string "DI")

	       /* xorps is one byte shorter for non-AVX targets.  */
	       (eq_attr "alternative" "12,16")
		 (cond [(and (match_test "TARGET_AVX512F")
			     (not (match_test "TARGET_PREFER_AVX256")))
			  (const_string "XI")
			(match_test "TARGET_AVX")
			  (const_string "V2DF")
			(ior (not (match_test "TARGET_SSE2"))
			     (match_test "optimize_function_for_size_p (cfun)"))
			  (const_string "V4SF")
			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
			  (const_string "TI")
		       ]
		       (const_string "V2DF"))

	       /* For architectures resolving dependencies on
		  whole SSE registers use movapd to break dependency
		  chains, otherwise use short move to avoid extra work.  */

	       /* movaps is one byte shorter for non-AVX targets.  */
	       (eq_attr "alternative" "13,17")
		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
				  (not (match_test "TARGET_AVX512VL")))
			     (ior (match_operand 0 "ext_sse_reg_operand")
				  (match_operand 1 "ext_sse_reg_operand")))
			  (const_string "V8DF")
			(match_test "TARGET_AVX")
			  (const_string "DF")
			(ior (not (match_test "TARGET_SSE2"))
			     (match_test "optimize_function_for_size_p (cfun)"))
			  (const_string "V4SF")
			(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
			  (const_string "V4SF")
			(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
			  (const_string "V2DF")
		       ]
		       (const_string "DF"))

	       /* For architectures resolving dependencies on register
		  parts we may avoid extra work to zero out upper part
		  of register.  */
	       (eq_attr "alternative" "14,18")
		 (cond [(not (match_test "TARGET_SSE2"))
			  (const_string "V2SF")
			(match_test "TARGET_AVX")
			  (const_string "DF")
			(match_test "TARGET_SSE_SPLIT_REGS")
			  (const_string "V1DF")
		       ]
		       (const_string "DF"))

	       (and (eq_attr "alternative" "15,19")
		    (not (match_test "TARGET_SSE2")))
		 (const_string "V2SF")
	      ]
	      (const_string "DF")))
   (set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "3,4")
              (symbol_ref "false")]
           (symbol_ref "true")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "3,4")
              (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
	    (eq_attr "alternative" "20")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "21")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
           (symbol_ref "true")))
   (set (attr "enabled")
     (cond [(eq_attr "alternative" "22,23,24,25")
              (if_then_else
		(match_test "TARGET_HARD_DF_REGS")
		(symbol_ref "false")
		(const_string "*"))
            (not (match_test "TARGET_HARD_DF_REGS"))
	      (symbol_ref "false")
	   ]
	   (const_string "*")))])

(define_split
  [(set (match_operand:DF 0 "nonimmediate_gr_operand")
        (match_operand:DF 1 "general_gr_operand"))]
  "!TARGET_64BIT && reload_completed"
  [(const_int 0)]
  "ix86_split_long_move (operands); DONE;")

(define_insn "*movsf_internal"
  [(set (match_operand:SF 0 "nonimmediate_operand"
	  "=Yf*f,m   ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r  ,m")
	(match_operand:SF 1 "general_operand"
	  "Yf*fm,Yf*f,G   ,rmF,rF,C,v,m,v,v ,r ,*y ,m  ,*y,*y,r  ,rmF,rF"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress || reload_completed
       || !CONST_DOUBLE_P (operands[1])
       || ((optimize_function_for_size_p (cfun)
	    || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
	   && ((IS_STACK_MODE (SFmode)
		&& standard_80387_constant_p (operands[1]) > 0)
	       || (TARGET_SSE && TARGET_SSE_MATH
		   && standard_sse_constant_p (operands[1], SFmode) == 1)))
       || memory_operand (operands[0], SFmode)
       || !TARGET_HARD_SF_REGS)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_FMOV:
      if (which_alternative == 2)
        return standard_80387_constant_opcode (operands[1]);
      return output_387_reg_move (insn, operands);

    case TYPE_IMOV:
      return "mov{l}\t{%1, %0|%0, %1}";

    case TYPE_SSELOG1:
      return standard_sse_constant_opcode (insn, operands);

    case TYPE_SSEMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_SF:
	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
	    return "vmovss\t{%d1, %0|%0, %d1}";
	  return "%vmovss\t{%1, %0|%0, %1}";

	case MODE_V16SF:
	  return "vmovaps\t{%g1, %g0|%g0, %g1}";
	case MODE_V4SF:
	  return "%vmovaps\t{%1, %0|%0, %1}";

	case MODE_SI:
	  return "%vmovd\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    case TYPE_MMXMOV:
      switch (get_attr_mode (insn))
	{
	case MODE_DI:
	  return "movq\t{%1, %0|%0, %1}";
	case MODE_SI:
	  return "movd\t{%1, %0|%0, %1}";

	default:
	  gcc_unreachable ();
	}

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "9,10")
	      (const_string "sse2")
	   ]
	   (const_string "*")))
   (set (attr "type")
	(cond [(eq_attr "alternative" "0,1,2")
		 (const_string "fmov")
	       (eq_attr "alternative" "3,4,16,17")
		 (const_string "imov")
	       (eq_attr "alternative" "5")
		 (const_string "sselog1")
	       (eq_attr "alternative" "11,12,13,14,15")
		 (const_string "mmxmov")
	      ]
	      (const_string "ssemov")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "sselog1,ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "prefix_data16")
     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
       (const_string "1")
       (const_string "*")))
   (set (attr "mode")
        (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
		 (const_string "SI")
	       (eq_attr "alternative" "11")
		 (const_string "DI")
	       (eq_attr "alternative" "5")
		 (cond [(and (match_test "TARGET_AVX512F")
			     (not (match_test "TARGET_PREFER_AVX256")))
			  (const_string "V16SF")
			(match_test "TARGET_AVX")
			  (const_string "V4SF")
			(ior (not (match_test "TARGET_SSE2"))
			     (match_test "optimize_function_for_size_p (cfun)"))
			  (const_string "V4SF")
			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
			  (const_string "TI")
		       ]
		       (const_string "V4SF"))

	       /* For architectures resolving dependencies on
		  whole SSE registers use APS move to break dependency
		  chains, otherwise use short move to avoid extra work.

		  Do the same for architectures resolving dependencies on
		  the parts.  While in DF mode it is better to always handle
		  just register parts, the SF mode is different due to lack
		  of instructions to load just part of the register.  It is
		  better to maintain the whole registers in single format
		  to avoid problems on using packed logical operations.  */
	       (eq_attr "alternative" "6")
		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
				  (not (match_test "TARGET_AVX512VL")))
			     (ior (match_operand 0 "ext_sse_reg_operand")
				  (match_operand 1 "ext_sse_reg_operand")))
			  (const_string "V16SF")
			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
			     (match_test "TARGET_SSE_SPLIT_REGS"))
			  (const_string "V4SF")
		       ]
		       (const_string "SF"))
	      ]
	      (const_string "SF")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "9,14")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "10,15")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
           (symbol_ref "true")))
   (set (attr "enabled")
     (cond [(eq_attr "alternative" "16,17")
              (if_then_else
		(match_test "TARGET_HARD_SF_REGS")
		(symbol_ref "false")
		(const_string "*"))
            (not (match_test "TARGET_HARD_SF_REGS"))
	      (symbol_ref "false")
	   ]
	   (const_string "*")))])

(define_split
  [(set (match_operand 0 "any_fp_register_operand")
	(match_operand 1 "memory_operand"))]
  "reload_completed
   && (GET_MODE (operands[0]) == TFmode
       || GET_MODE (operands[0]) == XFmode
       || GET_MODE (operands[0]) == DFmode
       || GET_MODE (operands[0]) == SFmode)
   && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
  [(set (match_dup 0) (match_dup 2))]
  "operands[2] = find_constant_src (curr_insn);")

(define_split
  [(set (match_operand 0 "any_fp_register_operand")
	(float_extend (match_operand 1 "memory_operand")))]
  "reload_completed
   && (GET_MODE (operands[0]) == TFmode
       || GET_MODE (operands[0]) == XFmode
       || GET_MODE (operands[0]) == DFmode)
   && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
  [(set (match_dup 0) (match_dup 2))]
  "operands[2] = find_constant_src (curr_insn);")

;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
(define_split
  [(set (match_operand:X87MODEF 0 "fp_register_operand")
	(match_operand:X87MODEF 1 "immediate_operand"))]
  "reload_completed
   && (standard_80387_constant_p (operands[1]) == 8
       || standard_80387_constant_p (operands[1]) == 9)"
  [(set (match_dup 0)(match_dup 1))
   (set (match_dup 0)
	(neg:X87MODEF (match_dup 0)))]
{
  if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
    operands[1] = CONST0_RTX (<MODE>mode);
  else
    operands[1] = CONST1_RTX (<MODE>mode);
})

(define_insn "*swapxf"
  [(set (match_operand:XF 0 "register_operand" "+f")
	(match_operand:XF 1 "register_operand" "+f"))
   (set (match_dup 1)
	(match_dup 0))]
  "TARGET_80387"
{
  if (STACK_TOP_P (operands[0]))
    return "fxch\t%1";
  else
    return "fxch\t%0";
}
  [(set_attr "type" "fxch")
   (set_attr "mode" "XF")])


;; Zero extension instructions

(define_expand "zero_extendsidi2"
  [(set (match_operand:DI 0 "nonimmediate_operand")
	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])

(define_insn "*zero_extendsidi2"
  [(set (match_operand:DI 0 "nonimmediate_operand"
		"=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
	(zero_extend:DI
	 (match_operand:SI 1 "x86_64_zext_operand"
	        "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k,*km")))]
  ""
{
  switch (get_attr_type (insn))
    {
    case TYPE_IMOVX:
      if (ix86_use_lea_for_mov (insn, operands))
	return "lea{l}\t{%E1, %k0|%k0, %E1}";
      else
	return "mov{l}\t{%1, %k0|%k0, %1}";

    case TYPE_MULTI:
      return "#";

    case TYPE_MMXMOV:
      return "movd\t{%1, %0|%0, %1}";

    case TYPE_SSEMOV:
      if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
	{
	  if (EXT_REX_SSE_REG_P (operands[0])
	      || EXT_REX_SSE_REG_P (operands[1]))
	    return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
	  else
	    return "%vpmovzxdq\t{%1, %0|%0, %1}";
	}

      if (GENERAL_REG_P (operands[0]))
	return "%vmovd\t{%1, %k0|%k0, %1}";

      return "%vmovd\t{%1, %0|%0, %1}";

    case TYPE_MSKMOV:
      return "kmovd\t{%1, %k0|%k0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set (attr "isa")
     (cond [(eq_attr "alternative" "0,1,2")
	      (const_string "nox64")
	    (eq_attr "alternative" "3")
	      (const_string "x64")
	    (eq_attr "alternative" "7,8,9")
	      (const_string "sse2")
	    (eq_attr "alternative" "10")
	      (const_string "sse4")
	    (eq_attr "alternative" "11")
	      (const_string "avx512f")
	    (eq_attr "alternative" "12")
	      (const_string "x64_avx512bw")
	    (eq_attr "alternative" "13")
	      (const_string "avx512bw")
	   ]
	   (const_string "*")))
   (set (attr "mmx_isa")
     (if_then_else (eq_attr "alternative" "5,6")
		   (const_string "native")
		   (const_string "*")))
   (set (attr "type")
     (cond [(eq_attr "alternative" "0,1,2,4")
	      (const_string "multi")
	    (eq_attr "alternative" "5,6")
	      (const_string "mmxmov")
	    (eq_attr "alternative" "7")
	      (if_then_else (match_test "TARGET_64BIT")
		(const_string "ssemov")
		(const_string "multi"))
	    (eq_attr "alternative" "8,9,10,11")
	      (const_string "ssemov")
	    (eq_attr "alternative" "12,13")
	      (const_string "mskmov")
	   ]
	   (const_string "imovx")))
   (set (attr "prefix_extra")
     (if_then_else (eq_attr "alternative" "10,11")
       (const_string "1")
       (const_string "*")))
   (set (attr "prefix")
     (if_then_else (eq_attr "type" "ssemov")
       (const_string "maybe_vex")
       (const_string "orig")))
   (set (attr "prefix_0f")
     (if_then_else (eq_attr "type" "imovx")
       (const_string "0")
       (const_string "*")))
   (set (attr "mode")
     (cond [(eq_attr "alternative" "5,6")
	      (const_string "DI")
	    (and (eq_attr "alternative" "7")
		 (match_test "TARGET_64BIT"))
	      (const_string "TI")
	    (eq_attr "alternative" "8,10,11")
	      (const_string "TI")
	   ]
	   (const_string "SI")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "7")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
	    (eq_attr "alternative" "5,8")
	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
	   ]
           (symbol_ref "true")))])

(define_split
  [(set (match_operand:DI 0 "memory_operand")
     	(zero_extend:DI (match_operand:SI 1 "memory_operand")))]
  "reload_completed"
  [(set (match_dup 4) (const_int 0))]
  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")

(define_split
  [(set (match_operand:DI 0 "general_reg_operand")
	(zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
  "!TARGET_64BIT && reload_completed
   && REGNO (operands[0]) == REGNO (operands[1])"
  [(set (match_dup 4) (const_int 0))]
  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")

(define_split
  [(set (match_operand:DI 0 "nonimmediate_gr_operand")
	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
  "!TARGET_64BIT && reload_completed
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
  [(set (match_dup 3) (match_dup 1))
   (set (match_dup 4) (const_int 0))]
  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")

(define_mode_attr kmov_isa
  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])

(define_insn "zero_extend<mode>di2"
  [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
	(zero_extend:DI
	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
  "TARGET_64BIT"
  "@
   movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
   kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
   kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
  [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
   (set_attr "type" "imovx,mskmov,mskmov")
   (set_attr "mode" "SI,<MODE>,<MODE>")])

(define_expand "zero_extend<mode>si2"
  [(set (match_operand:SI 0 "register_operand")
	(zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
  ""
{
  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
    {
      operands[1] = force_reg (<MODE>mode, operands[1]);
      emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
      DONE;
    }
})

(define_insn_and_split "zero_extend<mode>si2_and"
  [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
	(zero_extend:SI
	  (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!REG_P (operands[1])
      || REGNO (operands[0]) != REGNO (operands[1]))
    {
      ix86_expand_clear (operands[0]);

      gcc_assert (!TARGET_PARTIAL_REG_STALL);
      emit_insn (gen_rtx_SET
      		 (gen_rtx_STRICT_LOW_PART
		  (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
		  operands[1]));
      DONE;
    }

  operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
}
  [(set_attr "type" "alu1")
   (set_attr "mode" "SI")])

(define_insn "*zero_extend<mode>si2"
  [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
	(zero_extend:SI
	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
  "@
   movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
   kmov<mskmodesuffix>\t{%1, %0|%0, %1}
   kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
  [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
   (set_attr "type" "imovx,mskmov,mskmov")
   (set_attr "mode" "SI,<MODE>,<MODE>")])

(define_expand "zero_extendqihi2"
  [(set (match_operand:HI 0 "register_operand")
	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
  ""
{
  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
    {
      operands[1] = force_reg (QImode, operands[1]);
      emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
      DONE;
    }
})

(define_insn_and_split "zero_extendqihi2_and"
  [(set (match_operand:HI 0 "register_operand" "=r,?&q")
	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!REG_P (operands[1])
      || REGNO (operands[0]) != REGNO (operands[1]))
    {
      ix86_expand_clear (operands[0]);

      gcc_assert (!TARGET_PARTIAL_REG_STALL);
      emit_insn (gen_rtx_SET
		 (gen_rtx_STRICT_LOW_PART
		  (VOIDmode, gen_lowpart (QImode, operands[0])),
		  operands[1]));
      DONE;
    }

  operands[0] = gen_lowpart (SImode, operands[0]);
}
  [(set_attr "type" "alu1")
   (set_attr "mode" "SI")])

; zero extend to SImode to avoid partial register stalls
(define_insn "*zero_extendqihi2"
  [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
  "@
   movz{bl|x}\t{%1, %k0|%k0, %1}
   kmovb\t{%1, %k0|%k0, %1}
   kmovb\t{%1, %0|%0, %1}"
  [(set_attr "isa" "*,avx512dq,avx512dq")
   (set_attr "type" "imovx,mskmov,mskmov")
   (set_attr "mode" "SI,QI,QI")])

;; Sign extension instructions

(define_expand "extendsidi2"
  [(set (match_operand:DI 0 "register_operand")
	(sign_extend:DI (match_operand:SI 1 "register_operand")))]
  ""
{
  if (!TARGET_64BIT)
    {
      emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
      DONE;
    }
})

(define_insn "*extendsidi2_rex64"
  [(set (match_operand:DI 0 "register_operand" "=*a,r")
	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
  "TARGET_64BIT"
  "@
   {cltq|cdqe}
   movs{lq|x}\t{%1, %0|%0, %1}"
  [(set_attr "type" "imovx")
   (set_attr "mode" "DI")
   (set_attr "prefix_0f" "0")
   (set_attr "modrm" "0,1")])

(define_insn "extendsidi2_1"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
  "!TARGET_64BIT"
  "#")

;; Split the memory case.  If the source register doesn't die, it will stay
;; this way, if it does die, following peephole2s take care of it.
(define_split
  [(set (match_operand:DI 0 "memory_operand")
	(sign_extend:DI (match_operand:SI 1 "register_operand")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (match_operand:SI 2 "register_operand"))]
  "reload_completed"
  [(const_int 0)]
{
  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);

  emit_move_insn (operands[3], operands[1]);

  /* Generate a cltd if possible and doing so it profitable.  */
  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
      && REGNO (operands[1]) == AX_REG
      && REGNO (operands[2]) == DX_REG)
    {
      emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
    }
  else
    {
      emit_move_insn (operands[2], operands[1]);
      emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
    }
  emit_move_insn (operands[4], operands[2]);
  DONE;
})

;; Peepholes for the case where the source register does die, after
;; being split with the above splitter.
(define_peephole2
  [(set (match_operand:SI 0 "memory_operand")
	(match_operand:SI 1 "general_reg_operand"))
   (set (match_operand:SI 2 "general_reg_operand") (match_dup 1))
   (parallel [(set (match_dup 2)
		   (ashiftrt:SI (match_dup 2) (const_int 31)))
	       (clobber (reg:CC FLAGS_REG))])
   (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
  "REGNO (operands[1]) != REGNO (operands[2])
   && peep2_reg_dead_p (2, operands[1])
   && peep2_reg_dead_p (4, operands[2])
   && !reg_mentioned_p (operands[2], operands[3])"
  [(set (match_dup 0) (match_dup 1))
   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 3) (match_dup 1))])

(define_peephole2
  [(set (match_operand:SI 0 "memory_operand")
	(match_operand:SI 1 "general_reg_operand"))
   (parallel [(set (match_operand:SI 2 "general_reg_operand")
		   (ashiftrt:SI (match_dup 1) (const_int 31)))
	       (clobber (reg:CC FLAGS_REG))])
   (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
  "/* cltd is shorter than sarl $31, %eax */
   !optimize_function_for_size_p (cfun)
   && REGNO (operands[1]) == AX_REG
   && REGNO (operands[2]) == DX_REG
   && peep2_reg_dead_p (2, operands[1])
   && peep2_reg_dead_p (3, operands[2])
   && !reg_mentioned_p (operands[2], operands[3])"
  [(set (match_dup 0) (match_dup 1))
   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 3) (match_dup 1))])

;; Extend to register case.  Optimize case where source and destination
;; registers match and cases where we can use cltd.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(sign_extend:DI (match_operand:SI 1 "register_operand")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (match_scratch:SI 2))]
  "reload_completed"
  [(const_int 0)]
{
  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);

  if (REGNO (operands[3]) != REGNO (operands[1]))
    emit_move_insn (operands[3], operands[1]);

  /* Generate a cltd if possible and doing so it profitable.  */
  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
      && REGNO (operands[3]) == AX_REG
      && REGNO (operands[4]) == DX_REG)
    {
      emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
      DONE;
    }

  if (REGNO (operands[4]) != REGNO (operands[1]))
    emit_move_insn (operands[4], operands[1]);

  emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
  DONE;
})

(define_insn "extend<mode>di2"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(sign_extend:DI
	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
  "TARGET_64BIT"
  "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
  [(set_attr "type" "imovx")
   (set_attr "mode" "DI")])

(define_insn "extendhisi2"
  [(set (match_operand:SI 0 "register_operand" "=*a,r")
	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
  ""
{
  switch (get_attr_prefix_0f (insn))
    {
    case 0:
      return "{cwtl|cwde}";
    default:
      return "movs{wl|x}\t{%1, %0|%0, %1}";
    }
}
  [(set_attr "type" "imovx")
   (set_attr "mode" "SI")
   (set (attr "prefix_0f")
     ;; movsx is short decodable while cwtl is vector decoded.
     (if_then_else (and (eq_attr "cpu" "!k6")
			(eq_attr "alternative" "0"))
	(const_string "0")
	(const_string "1")))
   (set (attr "znver1_decode")
     (if_then_else (eq_attr "prefix_0f" "0")
	(const_string "double")
	(const_string "direct")))
   (set (attr "modrm")
     (if_then_else (eq_attr "prefix_0f" "0")
	(const_string "0")
	(const_string "1")))])

(define_insn "*extendhisi2_zext"
  [(set (match_operand:DI 0 "register_operand" "=*a,r")
	(zero_extend:DI
	 (sign_extend:SI
	  (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
  "TARGET_64BIT"
{
  switch (get_attr_prefix_0f (insn))
    {
    case 0:
      return "{cwtl|cwde}";
    default:
      return "movs{wl|x}\t{%1, %k0|%k0, %1}";
    }
}
  [(set_attr "type" "imovx")
   (set_attr "mode" "SI")
   (set (attr "prefix_0f")
     ;; movsx is short decodable while cwtl is vector decoded.
     (if_then_else (and (eq_attr "cpu" "!k6")
			(eq_attr "alternative" "0"))
	(const_string "0")
	(const_string "1")))
   (set (attr "modrm")
     (if_then_else (eq_attr "prefix_0f" "0")
	(const_string "0")
	(const_string "1")))])

(define_insn "extendqisi2"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
  ""
  "movs{bl|x}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])

(define_insn "*extendqisi2_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
  "TARGET_64BIT"
  "movs{bl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])

(define_insn "extendqihi2"
  [(set (match_operand:HI 0 "register_operand" "=*a,r")
	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
  ""
{
  switch (get_attr_prefix_0f (insn))
    {
    case 0:
      return "{cbtw|cbw}";
    default:
      return "movs{bw|x}\t{%1, %0|%0, %1}";
    }
}
  [(set_attr "type" "imovx")
   (set_attr "mode" "HI")
   (set (attr "prefix_0f")
     ;; movsx is short decodable while cwtl is vector decoded.
     (if_then_else (and (eq_attr "cpu" "!k6")
			(eq_attr "alternative" "0"))
	(const_string "0")
	(const_string "1")))
   (set (attr "modrm")
     (if_then_else (eq_attr "prefix_0f" "0")
	(const_string "0")
	(const_string "1")))])

;; Conversions between float and double.

;; These are all no-ops in the model used for the 80387.
;; So just emit moves.

;; %%% Kill these when call knows how to work out a DFmode push earlier.
(define_split
  [(set (match_operand:DF 0 "push_operand")
	(float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
  "reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
   (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])

(define_split
  [(set (match_operand:XF 0 "push_operand")
	(float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
  "reload_completed"
  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
   (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")

(define_expand "extendsfdf2"
  [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
        (float_extend:DF (match_operand:SF 1 "general_operand")))]
  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
  /* ??? Needed for compress_float_constant since all fp constants
     are TARGET_LEGITIMATE_CONSTANT_P.  */
  if (CONST_DOUBLE_P (operands[1]))
    {
      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
	  && standard_80387_constant_p (operands[1]) > 0)
	{
	  operands[1] = simplify_const_unary_operation
	    (FLOAT_EXTEND, DFmode, operands[1], SFmode);
	  emit_move_insn_1 (operands[0], operands[1]);
	  DONE;
	}
      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
    }
})

(define_insn "*extendsfdf2"
  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
        (float_extend:DF
	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
  switch (which_alternative)
    {
    case 0:
    case 1:
      return output_387_reg_move (insn, operands);

    case 2:
      return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
    case 3:
      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
   (set_attr "avx_partial_xmm_update" "false,false,false,true")
   (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
   (set_attr "mode" "SF,XF,DF,DF")
   (set (attr "enabled")
     (if_then_else
       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "0,1")
	 (symbol_ref "TARGET_MIX_SSE_I387")
	 (symbol_ref "true"))
       (if_then_else
	 (eq_attr "alternative" "0,1")
	 (symbol_ref "true")
	 (symbol_ref "false"))))])

/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
   cvtss2sd:
      unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
      cvtps2pd xmm2,xmm1
   We do the conversion post reload to avoid producing of 128bit spills
   that might lead to ICE on 32bit target.  The sequence unlikely combine
   anyway.  */
(define_split
  [(set (match_operand:DF 0 "sse_reg_operand")
        (float_extend:DF
	  (match_operand:SF 1 "nonimmediate_operand")))]
  "TARGET_USE_VECTOR_FP_CONVERTS
   && optimize_insn_for_speed_p ()
   && reload_completed
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
   [(set (match_dup 2)
	 (float_extend:V2DF
	   (vec_select:V2SF
	     (match_dup 3)
	     (parallel [(const_int 0) (const_int 1)]))))]
{
  operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
  operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
  /* Use movss for loading from memory, unpcklps reg, reg for registers.
     Try to avoid move when unpacking can be done in source.  */
  if (REG_P (operands[1]))
    {
      /* If it is unsafe to overwrite upper half of source, we need
	 to move to destination and unpack there.  */
      if (REGNO (operands[0]) != REGNO (operands[1])
	  || (EXT_REX_SSE_REG_P (operands[1])
	      && !TARGET_AVX512VL))
	{
	  rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
	  emit_move_insn (tmp, operands[1]);
	}
      else
	operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
	 =v, v, then vbroadcastss will be only needed for AVX512F without
	 AVX512VL.  */
      if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
	emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
					       operands[3]));
      else
	{
	  rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
	  emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
	}
    }
  else
    emit_insn (gen_vec_setv4sf_0 (operands[3],
				  CONST0_RTX (V4SFmode), operands[1]));
})

;; It's more profitable to split and then extend in the same register.
(define_peephole2
  [(set (match_operand:DF 0 "sse_reg_operand")
	(float_extend:DF
	  (match_operand:SF 1 "memory_operand")))]
  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (float_extend:DF (match_dup 2)))]
  "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")

;; Break partial SSE register dependency stall.  This splitter should split
;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore

(define_split
  [(set (match_operand:DF 0 "sse_reg_operand")
        (float_extend:DF
          (match_operand:SF 1 "nonimmediate_operand")))]
  "!TARGET_AVX
   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && (!REG_P (operands[1])
       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
  [(set (match_dup 0)
        (vec_merge:V2DF
	  (vec_duplicate:V2DF
	    (float_extend:DF
	      (match_dup 1)))
	  (match_dup 0)
          (const_int 1)))]
{
  operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
})

(define_expand "extend<mode>xf2"
  [(set (match_operand:XF 0 "nonimmediate_operand")
        (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
  "TARGET_80387"
{
  /* ??? Needed for compress_float_constant since all fp constants
     are TARGET_LEGITIMATE_CONSTANT_P.  */
  if (CONST_DOUBLE_P (operands[1]))
    {
      if (standard_80387_constant_p (operands[1]) > 0)
	{
	  operands[1] = simplify_const_unary_operation
	    (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
	  emit_move_insn_1 (operands[0], operands[1]);
	  DONE;
	}
      operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
    }
})

(define_insn "*extend<mode>xf2_i387"
  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
        (float_extend:XF
	  (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
  "TARGET_80387"
  "* return output_387_reg_move (insn, operands);"
  [(set_attr "type" "fmov")
   (set_attr "mode" "<MODE>,XF")])

;; %%% This seems like bad news.
;; This cannot output into an f-reg because there is no way to be sure
;; of truncating in that case.  Otherwise this is just like a simple move
;; insn.  So we pretend we can output to a reg in order to get better
;; register preferencing, but we really use a stack slot.

;; Conversion from DFmode to SFmode.

(define_insn "truncdfsf2"
  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
	(float_truncate:SF
	  (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
  switch (which_alternative)
    {
    case 0:
    case 1:
      return output_387_reg_move (insn, operands);

    case 2:
      return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
    case 3:
      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
   (set_attr "avx_partial_xmm_update" "false,false,false,true")
   (set_attr "mode" "SF")
   (set (attr "enabled")
     (if_then_else
       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
       (cond [(eq_attr "alternative" "0")
		(symbol_ref "TARGET_MIX_SSE_I387")
	      (eq_attr "alternative" "1")
		(symbol_ref "TARGET_MIX_SSE_I387
			     && flag_unsafe_math_optimizations")
	   ]
	   (symbol_ref "true"))
       (cond [(eq_attr "alternative" "0")
		(symbol_ref "true")
	      (eq_attr "alternative" "1")
		(symbol_ref "flag_unsafe_math_optimizations")
	   ]
	   (symbol_ref "false"))))])

/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
   cvtsd2ss:
      unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
      cvtpd2ps xmm2,xmm1
   We do the conversion post reload to avoid producing of 128bit spills
   that might lead to ICE on 32bit target.  The sequence unlikely combine
   anyway.  */
(define_split
  [(set (match_operand:SF 0 "sse_reg_operand")
	(float_truncate:SF
	  (match_operand:DF 1 "nonimmediate_operand")))]
  "TARGET_USE_VECTOR_FP_CONVERTS
   && optimize_insn_for_speed_p ()
   && reload_completed
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
   [(set (match_dup 2)
	 (vec_concat:V4SF
	   (float_truncate:V2SF
	     (match_dup 4))
	   (match_dup 3)))]
{
  operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
  operands[3] = CONST0_RTX (V2SFmode);
  operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
  /* Use movsd for loading from memory, unpcklpd for registers.
     Try to avoid move when unpacking can be done in source, or SSE3
     movddup is available.  */
  if (REG_P (operands[1]))
    {
      if (!TARGET_SSE3
	  && REGNO (operands[0]) != REGNO (operands[1]))
	{
	  rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
	  emit_move_insn (tmp, operands[1]);
	  operands[1] = tmp;
	}
      else if (!TARGET_SSE3)
	operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
      emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
    }
  else
    emit_insn (gen_vec_concatv2df (operands[4], operands[1],
				   CONST0_RTX (DFmode)));
})

;; It's more profitable to split and then truncate in the same register.
(define_peephole2
  [(set (match_operand:SF 0 "sse_reg_operand")
	(float_truncate:SF
	  (match_operand:DF 1 "memory_operand")))]
  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
  "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")

;; Break partial SSE register dependency stall.  This splitter should split
;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore

(define_split
  [(set (match_operand:SF 0 "sse_reg_operand")
        (float_truncate:SF
	  (match_operand:DF 1 "nonimmediate_operand")))]
  "!TARGET_AVX
   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && (!REG_P (operands[1])
       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
  [(set (match_dup 0)
	(vec_merge:V4SF
	  (vec_duplicate:V4SF
	    (float_truncate:SF
	      (match_dup 1)))
	  (match_dup 0)
	  (const_int 1)))]
{
  operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
})

;; Conversion from XFmode to {SF,DF}mode

(define_insn "truncxf<mode>2"
  [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
	(float_truncate:MODEF
	  (match_operand:XF 1 "register_operand" "f,f")))]
  "TARGET_80387"
  "* return output_387_reg_move (insn, operands);"
  [(set_attr "type" "fmov")
   (set_attr "mode" "<MODE>")
   (set (attr "enabled")
     (cond [(eq_attr "alternative" "1")
	      (symbol_ref "flag_unsafe_math_optimizations")
	   ]
	   (symbol_ref "true")))])

;; Signed conversion to DImode.

(define_expand "fix_truncxfdi2"
  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
                   (fix:DI (match_operand:XF 1 "register_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_80387"
{
  if (TARGET_FISTTP)
   {
     emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
     DONE;
   }
})

(define_expand "fix_trunc<mode>di2"
  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
                   (fix:DI (match_operand:MODEF 1 "register_operand")))
              (clobber (reg:CC FLAGS_REG))])]
  "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
{
  if (TARGET_FISTTP
      && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
   {
     emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
     DONE;
   }
  if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
   {
     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
     emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
     if (out != operands[0])
	emit_move_insn (operands[0], out);
     DONE;
   }
})

;; Signed conversion to SImode.

(define_expand "fix_truncxfsi2"
  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
                   (fix:SI (match_operand:XF 1 "register_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_80387"
{
  if (TARGET_FISTTP)
   {
     emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
     DONE;
   }
})

(define_expand "fix_trunc<mode>si2"
  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
	           (fix:SI (match_operand:MODEF 1 "register_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
{
  if (TARGET_FISTTP
      && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
   {
     emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
     DONE;
   }
  if (SSE_FLOAT_MODE_P (<MODE>mode))
   {
     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
     emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
     if (out != operands[0])
	emit_move_insn (operands[0], out);
     DONE;
   }
})

;; Signed conversion to HImode.

(define_expand "fix_trunc<mode>hi2"
  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
	           (fix:HI (match_operand:X87MODEF 1 "register_operand")))
              (clobber (reg:CC FLAGS_REG))])]
  "TARGET_80387
   && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
{
  if (TARGET_FISTTP)
   {
     emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
     DONE;
   }
})

;; Unsigned conversion to DImode

(define_insn "fixuns_trunc<mode>di2"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unsigned_fix:DI
	  (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
  "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
  "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
  [(set_attr "type" "sseicvt")
   (set_attr "prefix" "evex")
   (set_attr "mode" "DI")])

;; Unsigned conversion to SImode.

(define_expand "fixuns_trunc<mode>si2"
  [(parallel
    [(set (match_operand:SI 0 "register_operand")
	  (unsigned_fix:SI
	    (match_operand:MODEF 1 "nonimmediate_operand")))
     (use (match_dup 2))
     (clobber (match_scratch:<ssevecmode> 3))
     (clobber (match_scratch:<ssevecmode> 4))])]
  "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
{
  machine_mode mode = <MODE>mode;
  machine_mode vecmode = <ssevecmode>mode;
  REAL_VALUE_TYPE TWO31r;
  rtx two31;

  if (TARGET_AVX512F)
    {
      emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
      DONE;
    }

  if (optimize_insn_for_size_p ())
    FAIL;

  real_ldexp (&TWO31r, &dconst1, 31);
  two31 = const_double_from_real_value (TWO31r, mode);
  two31 = ix86_build_const_vector (vecmode, true, two31);
  operands[2] = force_reg (vecmode, two31);
})

(define_insn "fixuns_trunc<mode>si2_avx512f"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unsigned_fix:SI
	  (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
  "TARGET_AVX512F && TARGET_SSE_MATH"
  "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
  [(set_attr "type" "sseicvt")
   (set_attr "prefix" "evex")
   (set_attr "mode" "SI")])

(define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (unsigned_fix:SI
	    (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
  "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
  "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
  [(set_attr "type" "sseicvt")
   (set_attr "prefix" "evex")
   (set_attr "mode" "SI")])

(define_insn_and_split "*fixuns_trunc<mode>_1"
  [(set (match_operand:SI 0 "register_operand" "=&x,&x")
	(unsigned_fix:SI
	  (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
   (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
   (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
   (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
   && optimize_function_for_speed_p (cfun)"
  "#"
  "&& reload_completed"
  [(const_int 0)]
{
  ix86_split_convert_uns_si_sse (operands);
  DONE;
})

;; Unsigned conversion to HImode.
;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.

(define_expand "fixuns_trunc<mode>hi2"
  [(set (match_dup 2)
	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
   (set (match_operand:HI 0 "nonimmediate_operand")
	(subreg:HI (match_dup 2) 0))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "operands[2] = gen_reg_rtx (SImode);")

;; When SSE is available, it is always faster to use it!
(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
	(fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
  "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
  [(set_attr "type" "sseicvt")
   (set_attr "prefix" "maybe_vex")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<SWI48:MODE>mode == DImode")
	  (const_string "1")
	  (const_string "*")))
   (set_attr "mode" "<MODEF:MODE>")
   (set_attr "athlon_decode" "double,vector")
   (set_attr "amdfam10_decode" "double,double")
   (set_attr "bdver1_decode" "double,double")])

;; Avoid vector decoded forms of the instruction.
(define_peephole2
  [(match_scratch:MODEF 2 "x")
   (set (match_operand:SWI48 0 "register_operand")
	(fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
  "TARGET_AVOID_VECTOR_DECODE
   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (fix:SWI48 (match_dup 2)))])

(define_insn "fix_trunc<mode>_i387_fisttp"
  [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
	(fix:SWI248x (match_operand 1 "register_operand" "f")))
   (clobber (match_scratch:XF 2 "=&f"))]
  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
   && TARGET_FISTTP
   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
	 && (TARGET_64BIT || <MODE>mode != DImode))
	&& TARGET_SSE_MATH)"
  "* return output_fix_trunc (insn, operands, true);"
  [(set_attr "type" "fisttp")
   (set_attr "mode" "<MODE>")])

;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
;; function in i386.c.
(define_insn_and_split "*fix_trunc<mode>_i387_1"
  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
	(fix:SWI248x (match_operand 1 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
   && !TARGET_FISTTP
   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
	 && (TARGET_64BIT || <MODE>mode != DImode))
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  ix86_optimize_mode_switching[I387_TRUNC] = 1;

  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);

  emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
				       operands[2], operands[3]));
  DONE;
}
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "trunc")
   (set_attr "mode" "<MODE>")])

(define_insn "fix_truncdi_i387"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
	(fix:DI (match_operand 1 "register_operand" "f")))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))
   (clobber (match_scratch:XF 4 "=&f"))]
  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
   && !TARGET_FISTTP
   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "trunc")
   (set_attr "mode" "DI")])

(define_insn "fix_trunc<mode>_i387"
  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
	(fix:SWI24 (match_operand 1 "register_operand" "f")))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))]
  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
   && !TARGET_FISTTP
   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "trunc")
   (set_attr "mode" "<MODE>")])

(define_insn "x86_fnstcw_1"
  [(set (match_operand:HI 0 "memory_operand" "=m")
	(unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
  "TARGET_80387"
  "fnstcw\t%0"
  [(set (attr "length")
	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
   (set_attr "mode" "HI")
   (set_attr "unit" "i387")
   (set_attr "bdver1_decode" "vector")])

;; Conversion between fixed point and floating point.

;; Even though we only accept memory inputs, the backend _really_
;; wants to be able to do this between registers.  Thankfully, LRA
;; will fix this up for us during register allocation.

(define_insn "floathi<mode>2"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
  "TARGET_80387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)"
  "fild%Z1\t%1"
  [(set_attr "type" "fmov")
   (set_attr "mode" "<MODE>")
   (set_attr "znver1_decode" "double")
   (set_attr "fp_int_src" "true")])

(define_insn "float<SWI48x:mode>xf2"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
  "TARGET_80387"
  "fild%Z1\t%1"
  [(set_attr "type" "fmov")
   (set_attr "mode" "XF")
   (set_attr "znver1_decode" "double")
   (set_attr "fp_int_src" "true")])

(define_expand "float<SWI48x:mode><MODEF:mode>2"
  [(set (match_operand:MODEF 0 "register_operand")
	(float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
  "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
   || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
       && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")

(define_insn "*float<SWI48:mode><MODEF:mode>2"
  [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
	(float:MODEF
	  (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
  "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
   || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
  "@
   fild%Z1\t%1
   %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
   %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
  [(set_attr "type" "fmov,sseicvt,sseicvt")
   (set_attr "avx_partial_xmm_update" "false,true,true")
   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
   (set_attr "mode" "<MODEF:MODE>")
   (set (attr "prefix_rex")
     (if_then_else
       (and (eq_attr "prefix" "maybe_vex")
	    (match_test "<SWI48:MODE>mode == DImode"))
       (const_string "1")
       (const_string "*")))
   (set_attr "unit" "i387,*,*")
   (set_attr "athlon_decode" "*,double,direct")
   (set_attr "amdfam10_decode" "*,vector,double")
   (set_attr "bdver1_decode" "*,double,direct")
   (set_attr "znver1_decode" "double,*,*")
   (set_attr "fp_int_src" "true")
   (set (attr "enabled")
     (if_then_else
       (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "TARGET_MIX_SSE_I387
		      && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
					   <SWI48:MODE>mode)")
	 (symbol_ref "true"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "true")
	 (symbol_ref "false"))))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "1")
	      (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
	   (symbol_ref "true")))])

(define_insn "*floatdi<MODEF:mode>2_i387"
  [(set (match_operand:MODEF 0 "register_operand" "=f")
	(float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
  "!TARGET_64BIT
   && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
  "fild%Z1\t%1"
  [(set_attr "type" "fmov")
   (set_attr "mode" "<MODEF:MODE>")
   (set_attr "znver1_decode" "double")
   (set_attr "fp_int_src" "true")])

;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
;; alternative in sse2_loadld.
(define_split
  [(set (match_operand:MODEF 0 "sse_reg_operand")
	(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
  "TARGET_SSE2
   && TARGET_USE_VECTOR_CONVERTS
   && optimize_function_for_speed_p (cfun)
   && reload_completed
   && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
  [(const_int 0)]
{
  operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
  operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);

  emit_insn (gen_sse2_loadld (operands[4],
			      CONST0_RTX (V4SImode), operands[1]));

  if (<ssevecmode>mode == V4SFmode)
    emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
  else
    emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
  DONE;
})

;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers.  */

(define_split
  [(set (match_operand:X87MODEF 0 "register_operand")
	(float:X87MODEF
	  (match_operand:DI 1 "register_operand")))]
  "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
   && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
   && can_create_pseudo_p ()"
  [(const_int 0)]
{
  emit_insn (gen_floatdi<mode>2_i387_with_xmm
	     (operands[0], operands[1],
	      assign_386_stack_local (DImode, SLOT_TEMP)));
  DONE;
})

(define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
	(float:X87MODEF
	  (match_operand:DI 1 "register_operand" "r,r")))
   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
   (clobber (match_scratch:V4SI 3 "=x,x"))
   (clobber (match_scratch:V4SI 4 "=X,x"))]
  "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
   && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 2) (match_dup 3))
   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
{
  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
     Assemble the 64-bit DImode value in an xmm register.  */
  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
			      gen_lowpart (SImode, operands[1])));
  if (TARGET_SSE4_1)
    emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
				  gen_highpart (SImode, operands[1]),
				  GEN_INT (2)));
  else
    {
      emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
				  gen_highpart (SImode, operands[1])));
      emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
					     operands[4]));
    }
  operands[3] = gen_lowpart (DImode, operands[3]);
}
  [(set_attr "isa" "sse4,*")
   (set_attr "type" "multi")
   (set_attr "mode" "<X87MODEF:MODE>")
   (set_attr "unit" "i387")
   (set_attr "fp_int_src" "true")])

;; Break partial SSE register dependency stall.  This splitter should split
;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore

(define_split
  [(set (match_operand:MODEF 0 "sse_reg_operand")
	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
  "!TARGET_AVX
   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && (!EXT_REX_SSE_REG_P (operands[0])
       || TARGET_AVX512VL)"
  [(set (match_dup 0)
	(vec_merge:<MODEF:ssevecmode>
	  (vec_duplicate:<MODEF:ssevecmode>
	    (float:MODEF
	      (match_dup 1)))
	  (match_dup 0)
	  (const_int 1)))]
{
  const machine_mode vmode = <MODEF:ssevecmode>mode;

  operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
  emit_move_insn (operands[0], CONST0_RTX (vmode));
})

(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
  [(set (match_operand:MODEF 0 "register_operand")
	(unsigned_float:MODEF
	  (match_operand:SWI12 1 "nonimmediate_operand")))]
  "!TARGET_64BIT
   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
{
  operands[1] = convert_to_mode (SImode, operands[1], 1);
  emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
  DONE;
})

(define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
  [(set (match_operand:MODEF 0 "register_operand" "=v")
	(unsigned_float:MODEF
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
  "TARGET_AVX512F && TARGET_SSE_MATH"
  "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
  [(set_attr "type" "sseicvt")
   (set_attr "avx_partial_xmm_update" "true")
   (set_attr "prefix" "evex")
   (set_attr "mode" "<MODEF:MODE>")])

;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
;; SImode values to stack. Also note that fild loads from memory only.

(define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
	(unsigned_float:X87MODEF
	  (match_operand:SI 1 "nonimmediate_operand" "rm")))
   (clobber (match_operand:DI 2 "memory_operand" "=m"))
   (clobber (match_scratch:DI 3 "=x"))]
  "!TARGET_64BIT
   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
  "#"
  "&& reload_completed"
  [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
   (set (match_dup 2) (match_dup 3))
   (set (match_dup 0)
	(float:X87MODEF (match_dup 2)))]
  ""
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_expand "floatunssi<mode>2"
  [(set (match_operand:X87MODEF 0 "register_operand")
	(unsigned_float:X87MODEF
	  (match_operand:SI 1 "nonimmediate_operand")))]
  "(!TARGET_64BIT
    && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
    && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
   || ((!TARGET_64BIT || TARGET_AVX512F)
       && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
    {
      emit_insn (gen_floatunssi<mode>2_i387_with_xmm
		  (operands[0], operands[1],
		   assign_386_stack_local (DImode, SLOT_TEMP)));
      DONE;
    }
  if (!TARGET_AVX512F)
    {
      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
      DONE;
    }
})

(define_expand "floatunsdisf2"
  [(set (match_operand:SF 0 "register_operand")
	(unsigned_float:SF
	  (match_operand:DI 1 "nonimmediate_operand")))]
  "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
{
  if (!TARGET_AVX512F)
    {
      x86_emit_floatuns (operands);
      DONE;
    }
})

(define_expand "floatunsdidf2"
  [(set (match_operand:DF 0 "register_operand")
	(unsigned_float:DF
	  (match_operand:DI 1 "nonimmediate_operand")))]
  "((TARGET_64BIT && TARGET_AVX512F)
    || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
   && TARGET_SSE2 && TARGET_SSE_MATH"
{
  if (!TARGET_64BIT)
    {
      ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
      DONE;
    }
  if (!TARGET_AVX512F)
    {
      x86_emit_floatuns (operands);
      DONE;
    }
})

;; Load effective address instructions

(define_insn_and_split "*lea<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
  ""
{
  if (SImode_address_operand (operands[1], VOIDmode))
    {
      gcc_assert (TARGET_64BIT);
      return "lea{l}\t{%E1, %k0|%k0, %E1}";
    }
  else 
    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
}
  "reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
  [(const_int 0)]
{
  machine_mode mode = <MODE>mode;
  rtx pat;

  /* ix86_avoid_lea_for_addr re-recognizes insn and may
     change operands[] array behind our back.  */
  pat = PATTERN (curr_insn);

  operands[0] = SET_DEST (pat);
  operands[1] = SET_SRC (pat);

  /* Emit all operations in SImode for zero-extended addresses.  */
  if (SImode_address_operand (operands[1], VOIDmode))
    mode = SImode;

  ix86_split_lea_for_addr (curr_insn, operands, mode);

  /* Zero-extend return register to DImode for zero-extended addresses.  */
  if (mode != <MODE>mode)
    emit_insn (gen_zero_extendsidi2
    	       (operands[0], gen_lowpart (mode, operands[0])));

  DONE;
}
  [(set_attr "type" "lea")
   (set (attr "mode")
     (if_then_else
       (match_operand 1 "SImode_address_operand")
       (const_string "SI")
       (const_string "<MODE>")))])

;; Add instructions

(define_expand "add<mode>3"
  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
	(plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
		    (match_operand:SDWIM 2 "<general_hilo_operand>")))]
  ""
  "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")

(define_insn_and_split "*add<dwi>3_doubleword"
  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
	(plus:<DWI>
	  (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
	  (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (plus:DWIH (match_dup 1) (match_dup 2))
		     (match_dup 1)))
	      (set (match_dup 0)
		   (plus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (match_dup 3)
		   (plus:DWIH
		     (plus:DWIH
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
		       (match_dup 4))
		     (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
  if (operands[2] == const0_rtx)
    {
      ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3]);
      DONE;
    }
})

(define_insn "*add<mode>_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
	(plus:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
	  (match_operand:SWI48 2 "x86_64_general_operand" "re,m,0,le")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_INCDEC:
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (operands[2] == const1_rtx)
        return "inc{<imodesuffix>}\t%0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{<imodesuffix>}\t%0";
	}

    default:
      /* For most processors, ADD is faster than LEA.  This alternative
	 was added to use ADD as much as possible.  */
      if (which_alternative == 2)
        std::swap (operands[1], operands[2]);
        
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "3")
              (const_string "lea")
	    (match_operand:SWI48 2 "incdec_operand")
	      (const_string "incdec")
	   ]
	   (const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "<MODE>")])

;; It may seem that nonimmediate operand is proper one for operand 1.
;; The addsi_1 pattern allows nonimmediate operand at that place and
;; we take care in ix86_binary_operator_ok to not allow two memory
;; operands so proper swapping will be done in reload.  This allow
;; patterns constructed from addsi_1 to match.

(define_insn "addsi_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
	(zero_extend:DI
	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
		   (match_operand:SI 2 "x86_64_general_operand" "rme,0,le"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{l}\t%k0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{l}\t%k0";
	}

    default:
      /* For most processors, ADD is faster than LEA.  This alternative
	 was added to use ADD as much as possible.  */
      if (which_alternative == 1)
        std::swap (operands[1], operands[2]);

      if (x86_maybe_negate_const_int (&operands[2], SImode))
        return "sub{l}\t{%2, %k0|%k0, %2}";

      return "add{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "2")
	      (const_string "lea")
	    (match_operand:SI 2 "incdec_operand")
	      (const_string "incdec")
	   ]
	   (const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "SI")])

(define_insn "*addhi_1"
  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
		 (match_operand:HI 2 "general_operand" "rn,m,0,ln")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, HImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_INCDEC:
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (operands[2] == const1_rtx)
	return "inc{w}\t%0";
      else
	{
	  gcc_assert (operands[2] == constm1_rtx);
	  return "dec{w}\t%0";
	}

    default:
      /* For most processors, ADD is faster than LEA.  This alternative
	 was added to use ADD as much as possible.  */
      if (which_alternative == 2)
        std::swap (operands[1], operands[2]);

      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], HImode))
	return "sub{w}\t{%2, %0|%0, %2}";

      return "add{w}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "3")
              (const_string "lea")
	    (match_operand:HI 2 "incdec_operand")
	      (const_string "incdec")
	   ]
	   (const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "HI,HI,HI,SI")])

(define_insn "*addqi_1"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, QImode, operands)"
{
  bool widen = (get_attr_mode (insn) != MODE_QI);

  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_INCDEC:
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (operands[2] == const1_rtx)
	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
      else
	{
	  gcc_assert (operands[2] == constm1_rtx);
	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
	}

    default:
      /* For most processors, ADD is faster than LEA.  These alternatives
	 were added to use ADD as much as possible.  */
      if (which_alternative == 2 || which_alternative == 4)
        std::swap (operands[1], operands[2]);

      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], QImode))
	{
	  if (widen)
	    return "sub{l}\t{%2, %k0|%k0, %2}";
	  else
	    return "sub{b}\t{%2, %0|%0, %2}";
	}
      if (widen)
        return "add{l}\t{%k2, %k0|%k0, %k2}";
      else
        return "add{b}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "5")
              (const_string "lea")
	    (match_operand:QI 2 "incdec_operand")
	      (const_string "incdec")
	   ]
	   (const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "QI,QI,QI,SI,SI,SI")
   ;; Potential partial reg stall on alternatives 3 and 4.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "3,4")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*add<mode>_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0")
		    (match_operand:SWI12 2 "general_operand" "<r>mn")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && (rtx_equal_p (operands[0], operands[1])
       || rtx_equal_p (operands[0], operands[2]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
	return "inc{<imodesuffix>}\t%0";
      else
	{
	  gcc_assert (operands[2] == constm1_rtx);
	  return "dec{<imodesuffix>}\t%0";
	}

    default:
      if (x86_maybe_negate_const_int (&operands[2], QImode))
	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:QI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set_attr "mode" "<MODE>")])

;; Split non destructive adds if we cannot use lea.
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(plus:SWI48 (match_operand:SWI48 1 "register_operand")
		    (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
  [(set (match_dup 0) (match_dup 1))
   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])])

;; Split non destructive adds if we cannot use lea.
(define_split
  [(set (match_operand:DI 0 "register_operand")
  	(zero_extend:DI
	  (plus:SI (match_operand:SI 1 "register_operand")
		   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
  [(set (match_dup 3) (match_dup 1))
   (parallel [(set (match_dup 0)
   	     	   (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[3] = gen_lowpart (SImode, operands[0]);")

;; Convert add to the lea pattern to avoid flags dependency.
(define_split
  [(set (match_operand:SWI 0 "register_operand")
	(plus:SWI (match_operand:SWI 1 "register_operand")
		  (match_operand:SWI 2 "<nonmemory_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
  [(set (match_dup 0)
	(plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
{
  if (<MODE>mode != <LEAMODE>mode)
    {
      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
    }
})

;; Convert add to the lea pattern to avoid flags dependency.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (plus:SI (match_operand:SI 1 "register_operand")
		   (match_operand:SI 2 "x86_64_nonmemory_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
  [(set (match_dup 0)
	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])

(define_insn "*add<mode>_2"
  [(set (reg FLAGS_REG)
	(compare
	  (plus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
	    (match_operand:SWI 2 "<general_operand>" "<r><i>,m,0"))
	  (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>")
	(plus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{<imodesuffix>}\t%0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{<imodesuffix>}\t%0";
	}

    default:
      if (which_alternative == 2)
        std::swap (operands[1], operands[2]);
        
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:SWI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "<MODE>")])

;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*addsi_2_zext"
  [(set (reg FLAGS_REG)
	(compare
	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
	  (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r,r")
	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (PLUS, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{l}\t%k0";
      else
	{
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{l}\t%k0";
	}

    default:
      if (which_alternative == 1)
        std::swap (operands[1], operands[2]);

      if (x86_maybe_negate_const_int (&operands[2], SImode))
        return "sub{l}\t{%2, %k0|%k0, %2}";

      return "add{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:SI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "SI")])

(define_insn "*add<mode>_3"
  [(set (reg FLAGS_REG)
	(compare
	  (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
	  (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
  "ix86_match_ccmode (insn, CCZmode)
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{<imodesuffix>}\t%0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{<imodesuffix>}\t%0";
	}

    default:
      if (which_alternative == 1)
        std::swap (operands[1], operands[2]);

      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:SWI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "<MODE>")])

;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*addsi_3_zext"
  [(set (reg FLAGS_REG)
	(compare
	  (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
	  (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
   (set (match_operand:DI 0 "register_operand" "=r,r")
	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
   && ix86_binary_operator_ok (PLUS, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{l}\t%k0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{l}\t%k0";
	}

    default:
      if (which_alternative == 1)
        std::swap (operands[1], operands[2]);

      if (x86_maybe_negate_const_int (&operands[2], SImode))
        return "sub{l}\t{%2, %k0|%k0, %2}";

      return "add{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:SI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "SI")])

; For comparisons against 1, -1 and 128, we may generate better code
; by converting cmp to add, inc or dec as done by peephole2.  This pattern
; is matched then.  We can't accept general immediate, because for
; case of overflows,  the result is messed up.
; Also carry flag is reversed compared to cmp, so this conversion is valid
; only for comparisons not depending on it.

(define_insn "*adddi_4"
  [(set (reg FLAGS_REG)
	(compare
	  (match_operand:DI 1 "nonimmediate_operand" "0")
	  (match_operand:DI 2 "x86_64_immediate_operand" "e")))
   (clobber (match_scratch:DI 0 "=rm"))]
  "TARGET_64BIT
   && ix86_match_ccmode (insn, CCGCmode)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == constm1_rtx)
        return "inc{q}\t%0";
      else
        {
	  gcc_assert (operands[2] == const1_rtx);
          return "dec{q}\t%0";
	}

    default:
      if (x86_maybe_negate_const_int (&operands[2], DImode))
	return "add{q}\t{%2, %0|%0, %2}";

      return "sub{q}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:DI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "DI")])

; For comparisons against 1, -1 and 128, we may generate better code
; by converting cmp to add, inc or dec as done by peephole2.  This pattern
; is matched then.  We can't accept general immediate, because for
; case of overflows,  the result is messed up.
; Also carry flag is reversed compared to cmp, so this conversion is valid
; only for comparisons not depending on it.

(define_insn "*add<mode>_4"
  [(set (reg FLAGS_REG)
	(compare
	  (match_operand:SWI124 1 "nonimmediate_operand" "0")
	  (match_operand:SWI124 2 "const_int_operand" "n")))
   (clobber (match_scratch:SWI124 0 "=<r>m"))]
  "ix86_match_ccmode (insn, CCGCmode)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == constm1_rtx)
        return "inc{<imodesuffix>}\t%0";
      else
        {
	  gcc_assert (operands[2] == const1_rtx);
          return "dec{<imodesuffix>}\t%0";
	}

    default:
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
	return "add{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:<MODE> 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "*add<mode>_5"
  [(set (reg FLAGS_REG)
	(compare
	  (plus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
	  (const_int 0)))
   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
  "ix86_match_ccmode (insn, CCGOCmode)
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
        return "inc{<imodesuffix>}\t%0";
      else
        {
          gcc_assert (operands[2] == constm1_rtx);
          return "dec{<imodesuffix>}\t%0";
	}

    default:
      if (which_alternative == 1)
        std::swap (operands[1], operands[2]);

      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (if_then_else (match_operand:SWI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set (attr "length_immediate")
      (if_then_else
	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
	(const_string "1")
	(const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "addqi_ext_1"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (plus:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
   (clobber (reg:CC FLAGS_REG))]
  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   rtx_equal_p (operands[0], operands[1])"
{
  switch (get_attr_type (insn))
    {
    case TYPE_INCDEC:
      if (operands[2] == const1_rtx)
	return "inc{b}\t%h0";
      else
        {
	  gcc_assert (operands[2] == constm1_rtx);
          return "dec{b}\t%h0";
        }

    default:
      return "add{b}\t{%2, %h0|%h0, %2}";
    }
}
  [(set_attr "isa" "*,nox64")
   (set (attr "type")
     (if_then_else (match_operand:QI 2 "incdec_operand")
	(const_string "incdec")
	(const_string "alu")))
   (set_attr "mode" "QI")])

(define_insn "*addqi_ext_2"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (plus:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "%0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (subreg:QI
	      (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
			       (const_int 8)
			       (const_int 8)) 0)) 0))
  (clobber (reg:CC FLAGS_REG))]
  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   rtx_equal_p (operands[0], operands[1])
   || rtx_equal_p (operands[0], operands[2])"
  "add{b}\t{%h2, %h0|%h0, %h2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "QI")])

;; Like DWI, but use POImode instead of OImode.
(define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])

;; Add with jump on overflow.
(define_expand "addv<mode>4"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (plus:<DPWI>
		       (sign_extend:<DPWI>
			 (match_operand:SWIDWI 1 "nonimmediate_operand"))
		       (match_dup 4))
			 (sign_extend:<DPWI>
			   (plus:SWIDWI (match_dup 1)
			     (match_operand:SWIDWI 2
			       "<general_hilo_operand>")))))
	      (set (match_operand:SWIDWI 0 "register_operand")
		   (plus:SWIDWI (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
{
  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
  if (CONST_SCALAR_INT_P (operands[2]))
    operands[4] = operands[2];
  else
    operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
})

(define_insn "*addv<mode>4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (plus:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
		   (sign_extend:<DWI>
		      (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
		(sign_extend:<DWI>
		   (plus:SWI (match_dup 1) (match_dup 2)))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(plus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "addv<mode>4_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (plus:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI 1 "nonimmediate_operand" "0"))
		   (match_operand:<DWI> 3 "const_int_operand" "i"))
		(sign_extend:<DWI>
		   (plus:SWI
		     (match_dup 1)
		     (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(plus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) == INTVAL (operands[3])"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
		  (const_string "1")
	       (match_test "<MODE_SIZE> == 8")
		  (const_string "4")]
	      (const_string "<MODE_SIZE>")))])

;; Quad word integer modes as mode attribute.
(define_mode_attr QPWI [(SI "TI") (DI "POI")])

(define_insn_and_split "*addv<dwi>4_doubleword"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (plus:<QPWI>
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0"))
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
	  (sign_extend:<QPWI>
	    (plus:<DWI> (match_dup 1) (match_dup 2)))))
   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
	(plus:<DWI> (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (plus:DWIH (match_dup 1) (match_dup 2))
		     (match_dup 1)))
	      (set (match_dup 0)
		   (plus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (plus:<DWI>
		       (plus:<DWI>
			 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
			 (sign_extend:<DWI> (match_dup 4)))
		       (sign_extend:<DWI> (match_dup 5)))
		     (sign_extend:<DWI>
		       (plus:DWIH
			 (plus:DWIH
			   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
			   (match_dup 4))
			 (match_dup 5)))))
	      (set (match_dup 3)
		   (plus:DWIH
		     (plus:DWIH
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
		       (match_dup 4))
		     (match_dup 5)))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
})

(define_insn_and_split "*addv<dwi>4_doubleword_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (plus:<QPWI>
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 1 "nonimmediate_operand" "%0"))
	    (match_operand:<QPWI> 3 "const_scalar_int_operand" ""))
	  (sign_extend:<QPWI>
	    (plus:<DWI>
	      (match_dup 1)
	      (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
	(plus:<DWI> (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)
   && CONST_SCALAR_INT_P (operands[2])
   && rtx_equal_p (operands[2], operands[3])"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (plus:DWIH (match_dup 1) (match_dup 2))
		     (match_dup 1)))
	      (set (match_dup 0)
		   (plus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (plus:<DWI>
		       (plus:<DWI>
			 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
			 (sign_extend:<DWI> (match_dup 4)))
		       (match_dup 5))
		     (sign_extend:<DWI>
		       (plus:DWIH
			 (plus:DWIH
			   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
			   (match_dup 4))
			 (match_dup 5)))))
	      (set (match_dup 3)
		   (plus:DWIH
		     (plus:DWIH
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
		       (match_dup 4))
		     (match_dup 5)))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
  if (operands[2] == const0_rtx)
    {
      emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
				    operands[5]));
      DONE;
    }
})

(define_insn "*addv<mode>4_overflow_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (plus:<DWI>
	    (plus:<DWI>
	      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
		[(match_operand 3 "flags_reg_operand") (const_int 0)])
	      (sign_extend:<DWI>
		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")))
	    (sign_extend:<DWI>
	      (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
	  (sign_extend:<DWI>
	    (plus:SWI
	      (plus:SWI
		(match_operator:SWI 5 "ix86_carry_flag_operator"
		  [(match_dup 3) (const_int 0)])
		(match_dup 1))
	      (match_dup 2)))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
	(plus:SWI
	  (plus:SWI
	    (match_op_dup 5 [(match_dup 3) (const_int 0)])
	    (match_dup 1))
	  (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*addv<mode>4_overflow_2"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (plus:<DWI>
	    (plus:<DWI>
	      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
		[(match_operand 3 "flags_reg_operand") (const_int 0)])
	      (sign_extend:<DWI>
		(match_operand:SWI 1 "nonimmediate_operand" "%0")))
	    (match_operand:<DWI> 6 "const_int_operand" ""))
	  (sign_extend:<DWI>
	    (plus:SWI
	      (plus:SWI
		(match_operator:SWI 5 "ix86_carry_flag_operator"
		  [(match_dup 3) (const_int 0)])
		(match_dup 1))
	      (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
	(plus:SWI
	  (plus:SWI
	    (match_op_dup 5 [(match_dup 3) (const_int 0)])
	    (match_dup 1))
	  (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) == INTVAL (operands[6])"
  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
     (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
       (const_string "1")
       (const_string "4")))])

(define_expand "uaddv<mode>4"
  [(parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (plus:SWIDWI
		       (match_operand:SWIDWI 1 "nonimmediate_operand")
		       (match_operand:SWIDWI 2 "<general_hilo_operand>"))
		     (match_dup 1)))
	      (set (match_operand:SWIDWI 0 "register_operand")
		   (plus:SWIDWI (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (ltu (reg:CCC FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")

;; The lea patterns for modes less than 32 bits need to be matched by
;; several insns converted to real lea by splitters.

(define_insn_and_split "*lea<mode>_general_1"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(plus:SWI12
	  (plus:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
		      (match_operand:SWI12 2 "register_operand" "r"))
	  (match_operand:SWI12 3 "immediate_operand" "i")))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (plus:SI (match_dup 1) (match_dup 2))
	  (match_dup 3)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = gen_lowpart (SImode, operands[2]);
  operands[3] = gen_lowpart (SImode, operands[3]);
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_2"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(plus:SWI12
	  (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
		      (match_operand 2 "const248_operand" "n"))
	  (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (mult:SI (match_dup 1) (match_dup 2))
	  (match_dup 3)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[3] = gen_lowpart (SImode, operands[3]);
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_2b"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(plus:SWI12
	  (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
			(match_operand 2 "const123_operand" "n"))
	  (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (ashift:SI (match_dup 1) (match_dup 2))
	  (match_dup 3)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[3] = gen_lowpart (SImode, operands[3]);
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_3"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(plus:SWI12
	  (plus:SWI12
	    (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
			(match_operand 2 "const248_operand" "n"))
	    (match_operand:SWI12 3 "register_operand" "r"))
	  (match_operand:SWI12 4 "immediate_operand" "i")))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (plus:SI
	    (mult:SI (match_dup 1) (match_dup 2))
	    (match_dup 3))
	  (match_dup 4)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[3] = gen_lowpart (SImode, operands[3]);
  operands[4] = gen_lowpart (SImode, operands[4]);
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_3b"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(plus:SWI12
	  (plus:SWI12
	    (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
			  (match_operand 2 "const123_operand" "n"))
	    (match_operand:SWI12 3 "register_operand" "r"))
	  (match_operand:SWI12 4 "immediate_operand" "i")))]
  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (plus:SI
	    (ashift:SI (match_dup 1) (match_dup 2))
	    (match_dup 3))
	  (match_dup 4)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[3] = gen_lowpart (SImode, operands[3]);
  operands[4] = gen_lowpart (SImode, operands[4]);
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_4"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(any_or:SWI12
	  (ashift:SWI12
	    (match_operand:SWI12 1 "index_register_operand" "l")
	    (match_operand 2 "const_0_to_3_operand" "n"))
	  (match_operand 3 "const_int_operand" "n")))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
       < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SI
	  (mult:SI (match_dup 1) (match_dup 2))
	  (match_dup 3)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
}
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")])

(define_insn_and_split "*lea<mode>_general_4"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(any_or:SWI48
	  (ashift:SWI48
	    (match_operand:SWI48 1 "index_register_operand" "l")
	    (match_operand 2 "const_0_to_3_operand" "n"))
	  (match_operand 3 "const_int_operand" "n")))]
  "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
   < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
  "#"
  "&& reload_completed"
  [(set (match_dup 0)
	(plus:SWI48
	  (mult:SWI48 (match_dup 1) (match_dup 2))
	  (match_dup 3)))]
  "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
  [(set_attr "type" "lea")
   (set_attr "mode" "<MODE>")])

;; Subtract instructions

(define_expand "sub<mode>3"
  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
	(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
		     (match_operand:SDWIM 2 "<general_hilo_operand>")))]
  ""
  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")

(define_insn_and_split "*sub<dwi>3_doubleword"
  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
	(minus:<DWI>
	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
	  (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CC FLAGS_REG)
		   (compare:CC (match_dup 1) (match_dup 2)))
	      (set (match_dup 0)
		   (minus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (match_dup 3)
		   (minus:DWIH
		     (minus:DWIH
		       (match_dup 4)
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
  if (operands[2] == const0_rtx)
    {
      ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
      DONE;
    }
})

(define_insn "*sub<mode>_1"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(minus:SWI
	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
	  (match_operand:SWI 2 "<general_operand>" "<r><i>,m")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*subsi_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (minus:SI (match_operand:SI 1 "register_operand" "0")
		    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
  "sub{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*sub<mode>_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(minus:SWI12 (match_operand:SWI12 1 "register_operand" "0")
		     (match_operand:SWI12 2 "general_operand" "<r>mn")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*sub<mode>_2"
  [(set (reg FLAGS_REG)
	(compare
	  (minus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
	    (match_operand:SWI 2 "<general_operand>" "<r><i>,m"))
	  (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(minus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*subsi_2_zext"
  [(set (reg FLAGS_REG)
	(compare
	  (minus:SI (match_operand:SI 1 "register_operand" "0")
		    (match_operand:SI 2 "x86_64_general_operand" "rme"))
	  (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (minus:SI (match_dup 1)
		    (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (MINUS, SImode, operands)"
  "sub{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

;; Subtract with jump on overflow.
(define_expand "subv<mode>4"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (minus:<DPWI>
		       (sign_extend:<DPWI>
			 (match_operand:SWIDWI 1 "nonimmediate_operand"))
		       (match_dup 4))
		     (sign_extend:<DPWI>
		       (minus:SWIDWI (match_dup 1)
				     (match_operand:SWIDWI 2
						"<general_hilo_operand>")))))
	      (set (match_operand:SWIDWI 0 "register_operand")
		   (minus:SWIDWI (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
{
  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
  if (CONST_SCALAR_INT_P (operands[2]))
    operands[4] = operands[2];
  else
    operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
})

(define_insn "*subv<mode>4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (minus:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
		   (sign_extend:<DWI>
		      (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
		(sign_extend:<DWI>
		   (minus:SWI (match_dup 1) (match_dup 2)))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(minus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "subv<mode>4_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (minus:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI 1 "nonimmediate_operand" "0"))
		   (match_operand:<DWI> 3 "const_int_operand" "i"))
		(sign_extend:<DWI>
		   (minus:SWI
		     (match_dup 1)
		     (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(minus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) == INTVAL (operands[3])"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
		  (const_string "1")
	       (match_test "<MODE_SIZE> == 8")
		  (const_string "4")]
	      (const_string "<MODE_SIZE>")))])

(define_insn_and_split "*subv<dwi>4_doubleword"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (minus:<QPWI>
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
	  (sign_extend:<QPWI>
	    (minus:<DWI> (match_dup 1) (match_dup 2)))))
   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
	(minus:<DWI> (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CC FLAGS_REG)
		   (compare:CC (match_dup 1) (match_dup 2)))
	      (set (match_dup 0)
		   (minus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (minus:<DWI>
		       (minus:<DWI>
			 (sign_extend:<DWI> (match_dup 4))
			 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
		       (sign_extend:<DWI> (match_dup 5)))
		     (sign_extend:<DWI>
		       (minus:DWIH
			 (minus:DWIH
			   (match_dup 4)
			   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
			 (match_dup 5)))))
	      (set (match_dup 3)
		   (minus:DWIH
		     (minus:DWIH
		       (match_dup 4)
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 5)))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
})

(define_insn_and_split "*subv<dwi>4_doubleword_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (minus:<QPWI>
	    (sign_extend:<QPWI>
	      (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
	    (match_operand:<QPWI> 3 "const_scalar_int_operand" ""))
	  (sign_extend:<QPWI>
	    (minus:<DWI>
	      (match_dup 1)
	      (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
	(minus:<DWI> (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && CONST_SCALAR_INT_P (operands[2])
   && rtx_equal_p (operands[2], operands[3])"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CC FLAGS_REG)
		   (compare:CC (match_dup 1) (match_dup 2)))
	      (set (match_dup 0)
		   (minus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO
		     (minus:<DWI>
		       (minus:<DWI>
			 (sign_extend:<DWI> (match_dup 4))
			 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
		       (match_dup 5))
		     (sign_extend:<DWI>
		       (minus:DWIH
			 (minus:DWIH
			   (match_dup 4)
			   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
			 (match_dup 5)))))
	      (set (match_dup 3)
		   (minus:DWIH
		     (minus:DWIH
		       (match_dup 4)
		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 5)))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
  if (operands[2] == const0_rtx)
    {
      emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
				    operands[5]));
      DONE;
    }
})

(define_insn "*subv<mode>4_overflow_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (minus:<DWI>
	    (minus:<DWI>
	      (sign_extend:<DWI>
		(match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
	      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
		[(match_operand 3 "flags_reg_operand") (const_int 0)]))
	    (sign_extend:<DWI>
	      (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
	  (sign_extend:<DWI>
	    (minus:SWI
	      (minus:SWI
		(match_dup 1)
		(match_operator:SWI 5 "ix86_carry_flag_operator"
		  [(match_dup 3) (const_int 0)]))
	      (match_dup 2)))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
	(minus:SWI
	  (minus:SWI
	    (match_dup 1)
	    (match_op_dup 5 [(match_dup 3) (const_int 0)]))
	  (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*subv<mode>4_overflow_2"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO
	  (minus:<DWI>
	    (minus:<DWI>
	      (sign_extend:<DWI>
		(match_operand:SWI 1 "nonimmediate_operand" "%0"))
	      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
		[(match_operand 3 "flags_reg_operand") (const_int 0)]))
	    (match_operand:<DWI> 6 "const_int_operand" ""))
	  (sign_extend:<DWI>
	    (minus:SWI
	      (minus:SWI
		(match_dup 1)
		(match_operator:SWI 5 "ix86_carry_flag_operator"
		  [(match_dup 3) (const_int 0)]))
	      (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
	(minus:SWI
	  (minus:SWI
	    (match_dup 1)
	    (match_op_dup 5 [(match_dup 3) (const_int 0)]))
	  (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) == INTVAL (operands[6])"
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
     (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
       (const_string "1")
       (const_string "4")))])

(define_expand "usubv<mode>4"
  [(parallel [(set (reg:CC FLAGS_REG)
		   (compare:CC
		     (match_operand:SWI 1 "nonimmediate_operand")
		     (match_operand:SWI 2 "<general_operand>")))
	      (set (match_operand:SWI 0 "register_operand")
		   (minus:SWI (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (ltu (reg:CC FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")

(define_insn "*sub<mode>_3"
  [(set (reg FLAGS_REG)
	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
		 (match_operand:SWI 2 "<general_operand>" "<r><i>,m")))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(minus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCmode)
   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_peephole2
  [(parallel
     [(set (reg:CC FLAGS_REG)
	   (compare:CC (match_operand:SWI 0 "general_reg_operand")
		       (match_operand:SWI 1 "general_gr_operand")))
      (set (match_dup 0)
	   (minus:SWI (match_dup 0) (match_dup 1)))])]
  "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 0) (match_dup 1)))])

;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
;; subl $1, %eax; jnc .Lxx;
(define_peephole2
  [(parallel
     [(set (match_operand:SWI 0 "general_reg_operand")
	   (plus:SWI (match_dup 0) (const_int -1)))
      (clobber (reg FLAGS_REG))])
   (set (reg:CCZ FLAGS_REG)
	(compare:CCZ (match_dup 0) (const_int -1)))
   (set (pc)
	(if_then_else (match_operator 1 "bt_comparison_operator"
			[(reg:CCZ FLAGS_REG) (const_int 0)])
		      (match_operand 2)
		      (pc)))]
   "peep2_regno_dead_p (3, FLAGS_REG)"
   [(parallel
      [(set (reg:CC FLAGS_REG)
	    (compare:CC (match_dup 0) (const_int 1)))
       (set (match_dup 0)
	    (minus:SWI (match_dup 0) (const_int 1)))])
    (set (pc)
	 (if_then_else (match_dup 3)
		       (match_dup 2)
		       (pc)))]
{
  rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
				? GEU : LTU, VOIDmode, cc, const0_rtx);
})

(define_insn "*subsi_3_zext"
  [(set (reg FLAGS_REG)
	(compare (match_operand:SI 1 "register_operand" "0")
		 (match_operand:SI 2 "x86_64_general_operand" "rme")))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (minus:SI (match_dup 1)
		    (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
   && ix86_binary_operator_ok (MINUS, SImode, operands)"
  "sub{l}\t{%2, %1|%1, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

;; Add with carry and subtract with borrow

(define_insn "@add<mode>3_carry"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(plus:SWI
	  (plus:SWI
	    (match_operator:SWI 4 "ix86_carry_flag_operator"
	     [(match_operand 3 "flags_reg_operand") (const_int 0)])
	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
	  (match_operand:SWI 2 "<general_operand>" "<r><i>,m")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_insn "*add<mode>3_carry_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(plus:SWI
	  (match_operator:SWI 3 "ix86_carry_flag_operator"
	    [(match_operand 2 "flags_reg_operand") (const_int 0)])
	  (match_operand:SWI 1 "nonimmediate_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)"
  "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_insn "*addsi3_carry_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (plus:SI
	    (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
		      [(reg FLAGS_REG) (const_int 0)])
		     (match_operand:SI 1 "register_operand" "%0"))
	    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
  "adc{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "SI")])

(define_insn "*addsi3_carry_zext_0"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
		    [(reg FLAGS_REG) (const_int 0)])
		   (match_operand:SI 1 "register_operand" "0"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "adc{l}\t{$0, %k0|%k0, 0}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "SI")])

;; There is no point to generate ADCX instruction. ADC is shorter and faster.

(define_insn "addcarry<mode>"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extend:<DWI>
	    (plus:SWI48
	      (plus:SWI48
		(match_operator:SWI48 5 "ix86_carry_flag_operator"
		  [(match_operand 3 "flags_reg_operand") (const_int 0)])
		(match_operand:SWI48 1 "nonimmediate_operand" "%0,0"))
	      (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))
	  (plus:<DWI>
	    (zero_extend:<DWI> (match_dup 2))
	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
	      [(match_dup 3) (const_int 0)]))))
   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
	(plus:SWI48 (plus:SWI48 (match_op_dup 5
				 [(match_dup 3) (const_int 0)])
				(match_dup 1))
		    (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_expand "addcarry<mode>_0"
  [(parallel
     [(set (reg:CCC FLAGS_REG)
	   (compare:CCC
	     (plus:SWI48
	       (match_operand:SWI48 1 "nonimmediate_operand")
	       (match_operand:SWI48 2 "x86_64_general_operand"))
	     (match_dup 1)))
      (set (match_operand:SWI48 0 "nonimmediate_operand")
	   (plus:SWI48 (match_dup 1) (match_dup 2)))])]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")

(define_insn "*addcarry<mode>_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extend:<DWI>
	    (plus:SWI48
	      (plus:SWI48
		(match_operator:SWI48 5 "ix86_carry_flag_operator"
		  [(match_operand 3 "flags_reg_operand") (const_int 0)])
		(match_operand:SWI48 1 "nonimmediate_operand" "%0"))
	      (match_operand:SWI48 2 "x86_64_immediate_operand" "e")))
	  (plus:<DWI>
	    (match_operand:<DWI> 6 "const_scalar_int_operand" "")
	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
	      [(match_dup 3) (const_int 0)]))))
   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
	(plus:SWI48 (plus:SWI48 (match_op_dup 5
				 [(match_dup 3) (const_int 0)])
				(match_dup 1))
		    (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
   && CONST_INT_P (operands[2])
   /* Check that operands[6] is operands[2] zero extended from
      <MODE>mode to <DWI>mode.  */
   && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
       ? (CONST_INT_P (operands[6])
	  && UINTVAL (operands[6]) == (UINTVAL (operands[2])
				       & GET_MODE_MASK (<MODE>mode)))
       : (CONST_WIDE_INT_P (operands[6])
	  && CONST_WIDE_INT_NUNITS (operands[6]) == 2
	  && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
	      == UINTVAL (operands[2]))
	  && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
     (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
       (const_string "1")
       (const_string "4")))])

(define_insn "@sub<mode>3_carry"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(minus:SWI
	  (minus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
	    (match_operator:SWI 4 "ix86_carry_flag_operator"
	     [(match_operand 3 "flags_reg_operand") (const_int 0)]))
	  (match_operand:SWI 2 "<general_operand>" "<r><i>,m")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_insn "*sub<mode>3_carry_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(minus:SWI
	  (match_operand:SWI 1 "nonimmediate_operand" "0")
	  (match_operator:SWI 3 "ix86_carry_flag_operator"
	    [(match_operand 2 "flags_reg_operand") (const_int 0)])))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)"
  "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_insn "*subsi3_carry_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (minus:SI
	    (minus:SI
	      (match_operand:SI 1 "register_operand" "0")
	      (match_operator:SI 3 "ix86_carry_flag_operator"
	       [(reg FLAGS_REG) (const_int 0)]))
	    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
  "sbb{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "SI")])

(define_insn "*subsi3_carry_zext_0"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (minus:SI
	    (match_operand:SI 1 "register_operand" "0")
	    (match_operator:SI 2 "ix86_carry_flag_operator"
	      [(reg FLAGS_REG) (const_int 0)]))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "sbb{l}\t{$0, %k0|%k0, 0}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "SI")])

(define_insn "@sub<mode>3_carry_ccc"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
	  (plus:<DWI>
	    (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
	    (zero_extend:<DWI>
	      (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
   (clobber (match_scratch:DWIH 0 "=r"))]
  ""
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*sub<mode>3_carry_ccc_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
	  (plus:<DWI>
	    (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
	    (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
   (clobber (match_scratch:DWIH 0 "=r"))]
  ""
{
  operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
  return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
}
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

;; The sign flag is set from the
;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
;; result, the overflow flag likewise, but the overflow flag is also
;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
(define_insn "@sub<mode>3_carry_ccgz"
  [(set (reg:CCGZ FLAGS_REG)
	(unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
		      (match_operand:DWIH 2 "x86_64_general_operand" "rme")
		      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
		     UNSPEC_SBB))
   (clobber (match_scratch:DWIH 0 "=r"))]
  ""
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "subborrow<mode>"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extend:<DWI>
	    (match_operand:SWI48 1 "nonimmediate_operand" "0"))
	  (plus:<DWI>
	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
	      [(match_operand 3 "flags_reg_operand") (const_int 0)])
	    (zero_extend:<DWI>
	      (match_operand:SWI48 2 "nonimmediate_operand" "rm")))))
   (set (match_operand:SWI48 0 "register_operand" "=r")
	(minus:SWI48 (minus:SWI48
		       (match_dup 1)
		       (match_operator:SWI48 5 "ix86_carry_flag_operator"
			 [(match_dup 3) (const_int 0)]))
		     (match_dup 2)))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")])

(define_expand "subborrow<mode>_0"
  [(parallel
     [(set (reg:CC FLAGS_REG)
	   (compare:CC
	     (match_operand:SWI48 1 "nonimmediate_operand")
	     (match_operand:SWI48 2 "<general_operand>")))
      (set (match_operand:SWI48 0 "register_operand")
	   (minus:SWI48 (match_dup 1) (match_dup 2)))])]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")

;; Overflow setting add instructions

(define_expand "addqi3_cconly_overflow"
  [(parallel
     [(set (reg:CCC FLAGS_REG)
	   (compare:CCC
	     (plus:QI
	       (match_operand:QI 0 "nonimmediate_operand")
	       (match_operand:QI 1 "general_operand"))
	     (match_dup 0)))
      (clobber (match_scratch:QI 2))])]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))")

(define_insn "*add<mode>3_cconly_overflow_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (plus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
	    (match_operand:SWI 2 "<general_operand>" "<g>"))
	  (match_dup 1)))
   (clobber (match_scratch:SWI 0 "=<r>"))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*add<mode>3_cc_overflow_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	    (plus:SWI
		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")
		(match_operand:SWI 2 "<general_operand>" "<r><i>,m"))
	    (match_dup 1)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(plus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*addsi3_zext_cc_overflow_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (plus:SI
	    (match_operand:SI 1 "nonimmediate_operand" "%0")
	    (match_operand:SI 2 "x86_64_general_operand" "rme"))
	  (match_dup 1)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
  "add{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*add<mode>3_cconly_overflow_2"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (plus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
	    (match_operand:SWI 2 "<general_operand>" "<g>"))
	  (match_dup 2)))
   (clobber (match_scratch:SWI 0 "=<r>"))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*add<mode>3_cc_overflow_2"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	    (plus:SWI
		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")
		(match_operand:SWI 2 "<general_operand>" "<r><i>,m"))
	    (match_dup 2)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(plus:SWI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*addsi3_zext_cc_overflow_2"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (plus:SI
	    (match_operand:SI 1 "nonimmediate_operand" "%0")
	    (match_operand:SI 2 "x86_64_general_operand" "rme"))
	  (match_dup 2)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
  "add{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (plus:<DWI>
	    (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
	    (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))
	  (match_dup 1)))
   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
	(plus:<DWI> (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
  "#"
  "reload_completed"
  [(parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (plus:DWIH (match_dup 1) (match_dup 2))
		     (match_dup 1)))
	      (set (match_dup 0)
		   (plus:DWIH (match_dup 1) (match_dup 2)))])
   (parallel [(set (reg:CCC FLAGS_REG)
		   (compare:CCC
		     (zero_extend:<DWI>
		       (plus:DWIH
			 (plus:DWIH
			   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
			   (match_dup 4))
			 (match_dup 5)))
		     (plus:<DWI>
		       (match_dup 6)
		       (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
	      (set (match_dup 3)
		   (plus:DWIH
		     (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
				(match_dup 4))
		     (match_dup 5)))])]
{
  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
  if (operands[2] == const0_rtx)
    {
      emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
      DONE;
    }
  if (CONST_INT_P (operands[5]))
    operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
					    operands[5], <MODE>mode);
  else
    operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
})

;; x == 0 with zero flag test can be done also as x < 1U with carry flag
;; test, where the latter is preferrable if we have some carry consuming
;; instruction.
;; For x != 0, we need to use x < 1U with negation of carry, i.e.
;; + (1 - CF).
(define_insn_and_split "*add<mode>3_eq"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (plus:SWI
	    (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
	    (match_operand:SWI 1 "nonimmediate_operand"))
	  (match_operand:SWI 2 "<general_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 3) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (plus:SWI
		     (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
			       (match_dup 1))
		     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_insn_and_split "*add<mode>3_ne"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (plus:SWI
	    (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
	    (match_operand:SWI 1 "nonimmediate_operand"))
	  (match_operand:SWI 2 "<immediate_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "CONST_INT_P (operands[2])
   && (<MODE>mode != DImode
       || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 3) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (minus:SWI
		     (minus:SWI (match_dup 1)
				(ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[2] = gen_int_mode (~INTVAL (operands[2]),
			      <MODE>mode == DImode ? SImode : <MODE>mode);
})

(define_insn_and_split "*add<mode>3_eq_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
	  (match_operand:SWI 1 "<general_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 2) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
			     (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!nonimmediate_operand (operands[1], <MODE>mode))
    operands[1] = force_reg (<MODE>mode, operands[1]);
})

(define_insn_and_split "*add<mode>3_ne_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
	  (match_operand:SWI 1 "<general_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 2) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (minus:SWI (minus:SWI
				(match_dup 1)
				(ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
			      (const_int -1)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!nonimmediate_operand (operands[1], <MODE>mode))
    operands[1] = force_reg (<MODE>mode, operands[1]);
})

(define_insn_and_split "*sub<mode>3_eq"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(minus:SWI
	  (minus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand")
	    (eq:SWI (match_operand 3 "int_nonimmediate_operand")
		    (const_int 0)))
	  (match_operand:SWI 2 "<general_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 3) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (minus:SWI
		     (minus:SWI (match_dup 1)
				(ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_insn_and_split "*sub<mode>3_ne"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (minus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand")
	    (ne:SWI (match_operand 3 "int_nonimmediate_operand")
		    (const_int 0)))
	  (match_operand:SWI 2 "<immediate_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "CONST_INT_P (operands[2])
   && (<MODE>mode != DImode
       || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 3) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (plus:SWI
		     (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
			       (match_dup 1))
		     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
			      <MODE>mode == DImode ? SImode : <MODE>mode);
})

(define_insn_and_split "*sub<mode>3_eq_1"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(plus:SWI
	  (minus:SWI
	    (match_operand:SWI 1 "nonimmediate_operand")
	    (eq:SWI (match_operand 3 "int_nonimmediate_operand")
		    (const_int 0)))
	  (match_operand:SWI 2 "<immediate_operand>")))
   (clobber (reg:CC FLAGS_REG))]
  "CONST_INT_P (operands[2])
   && (<MODE>mode != DImode
       || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 3) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (minus:SWI
		     (minus:SWI (match_dup 1)
				(ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
		     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[2] = gen_int_mode (-INTVAL (operands[2]),
			      <MODE>mode == DImode ? SImode : <MODE>mode);
})

(define_insn_and_split "*sub<mode>3_eq_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(minus:SWI
	  (match_operand:SWI 1 "<general_operand>")
	  (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 2) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (minus:SWI (match_dup 1)
			      (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!nonimmediate_operand (operands[1], <MODE>mode))
    operands[1] = force_reg (<MODE>mode, operands[1]);
})

(define_insn_and_split "*sub<mode>3_ne_0"
  [(set (match_operand:SWI 0 "nonimmediate_operand")
	(minus:SWI
	  (match_operand:SWI 1 "<general_operand>")
	  (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (match_dup 2) (const_int 1)))
   (parallel [(set (match_dup 0)
		   (plus:SWI (plus:SWI
			       (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
			       (match_dup 1))
			     (const_int -1)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!nonimmediate_operand (operands[1], <MODE>mode))
    operands[1] = force_reg (<MODE>mode, operands[1]);
})

;; The patterns that match these are at the end of this file.

(define_expand "<plusminus_insn>xf3"
  [(set (match_operand:XF 0 "register_operand")
	(plusminus:XF
	  (match_operand:XF 1 "register_operand")
	  (match_operand:XF 2 "register_operand")))]
  "TARGET_80387")

(define_expand "<plusminus_insn><mode>3"
  [(set (match_operand:MODEF 0 "register_operand")
	(plusminus:MODEF
	  (match_operand:MODEF 1 "register_operand")
	  (match_operand:MODEF 2 "nonimmediate_operand")))]
  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")

;; Multiply instructions

(define_expand "mul<mode>3"
  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
		   (mult:SWIM248
		     (match_operand:SWIM248 1 "register_operand")
		     (match_operand:SWIM248 2 "<general_operand>")))
	      (clobber (reg:CC FLAGS_REG))])])

(define_expand "mulqi3"
  [(parallel [(set (match_operand:QI 0 "register_operand")
		   (mult:QI
		     (match_operand:QI 1 "register_operand")
		     (match_operand:QI 2 "nonimmediate_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_QIMODE_MATH")

;; On AMDFAM10
;; IMUL reg32/64, reg32/64, imm8 	Direct
;; IMUL reg32/64, mem32/64, imm8 	VectorPath
;; IMUL reg32/64, reg32/64, imm32 	Direct
;; IMUL reg32/64, mem32/64, imm32 	VectorPath
;; IMUL reg32/64, reg32/64 		Direct
;; IMUL reg32/64, mem32/64 		Direct
;;
;; On BDVER1, all above IMULs use DirectPath
;;
;; On AMDFAM10
;; IMUL reg16, reg16, imm8 	VectorPath
;; IMUL reg16, mem16, imm8 	VectorPath
;; IMUL reg16, reg16, imm16 	VectorPath
;; IMUL reg16, mem16, imm16 	VectorPath
;; IMUL reg16, reg16 		Direct
;; IMUL reg16, mem16 		Direct
;;
;; On BDVER1, all HI MULs use DoublePath

(define_insn "*mul<mode>3_1"
  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
	(mult:SWIM248
	  (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
	  (match_operand:SWIM248 2 "<general_operand>" "K,<i>,mr")))
   (clobber (reg:CC FLAGS_REG))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "@
   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "imul")
   (set_attr "prefix_0f" "0,0,1")
   (set (attr "athlon_decode")
	(cond [(eq_attr "cpu" "athlon")
		  (const_string "vector")
	       (eq_attr "alternative" "1")
		  (const_string "vector")
	       (and (eq_attr "alternative" "2")
	       	    (ior (match_test "<MODE>mode == HImode")
		         (match_operand 1 "memory_operand")))
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "amdfam10_decode")
	(cond [(and (eq_attr "alternative" "0,1")
	      	    (ior (match_test "<MODE>mode == HImode")
		         (match_operand 1 "memory_operand")))
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "bdver1_decode")
   	(if_then_else
	  (match_test "<MODE>mode == HImode")
	    (const_string "double")
	    (const_string "direct")))
   (set_attr "mode" "<MODE>")])

(define_insn "*mulsi3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
	(zero_extend:DI
	  (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
		   (match_operand:SI 2 "x86_64_general_operand" "K,e,mr"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "@
   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
   imul{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "imul")
   (set_attr "prefix_0f" "0,0,1")
   (set (attr "athlon_decode")
	(cond [(eq_attr "cpu" "athlon")
		  (const_string "vector")
	       (eq_attr "alternative" "1")
		  (const_string "vector")
	       (and (eq_attr "alternative" "2")
		    (match_operand 1 "memory_operand"))
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "amdfam10_decode")
	(cond [(and (eq_attr "alternative" "0,1")
		    (match_operand 1 "memory_operand"))
		  (const_string "vector")]
	      (const_string "direct")))
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "SI")])

;;On AMDFAM10 and BDVER1
;; MUL reg8 	Direct
;; MUL mem8 	Direct

(define_insn "*mulqi3_1"
  [(set (match_operand:QI 0 "register_operand" "=a")
	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_QIMODE_MATH
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "mul{b}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "direct")))
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "QI")])

;; Multiply with jump on overflow.
(define_expand "mulv<mode>4"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO (mult:<DWI>
			      (sign_extend:<DWI>
				 (match_operand:SWI248 1 "register_operand"))
			      (match_dup 4))
			   (sign_extend:<DWI>
			      (mult:SWI248 (match_dup 1)
					   (match_operand:SWI248 2
					      "<general_operand>")))))
	      (set (match_operand:SWI248 0 "register_operand")
		   (mult:SWI248 (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
{
  if (CONST_INT_P (operands[2]))
    operands[4] = operands[2];
  else
    operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
})

(define_insn "*mulv<mode>4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (mult:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
		   (sign_extend:<DWI>
		      (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
		(sign_extend:<DWI>
		   (mult:SWI48 (match_dup 1) (match_dup 2)))))
   (set (match_operand:SWI48 0 "register_operand" "=r,r")
	(mult:SWI48 (match_dup 1) (match_dup 2)))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "@
   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "imul")
   (set_attr "prefix_0f" "0,1")
   (set (attr "athlon_decode")
	(cond [(eq_attr "cpu" "athlon")
		  (const_string "vector")
	       (eq_attr "alternative" "0")
		  (const_string "vector")
	       (and (eq_attr "alternative" "1")
		    (match_operand 1 "memory_operand"))
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "amdfam10_decode")
	(cond [(and (eq_attr "alternative" "1")
		    (match_operand 1 "memory_operand"))
		  (const_string "vector")]
	      (const_string "direct")))
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "<MODE>")])

(define_insn "*mulvhi4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (mult:SI
		   (sign_extend:SI
		      (match_operand:HI 1 "nonimmediate_operand" "%0"))
		   (sign_extend:SI
		      (match_operand:HI 2 "nonimmediate_operand" "mr")))
		(sign_extend:SI
		   (mult:HI (match_dup 1) (match_dup 2)))))
   (set (match_operand:HI 0 "register_operand" "=r")
	(mult:HI (match_dup 1) (match_dup 2)))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "imul{w}\t{%2, %0|%0, %2}"
  [(set_attr "type" "imul")
   (set_attr "prefix_0f" "1")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "double")
   (set_attr "mode" "HI")])

(define_insn "*mulv<mode>4_1"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (mult:<DWI>
		   (sign_extend:<DWI>
		      (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
		   (match_operand:<DWI> 3 "const_int_operand" "K,i"))
		(sign_extend:<DWI>
		   (mult:SWI248 (match_dup 1)
				(match_operand:SWI248 2
				   "<immediate_operand>" "K,<i>")))))
   (set (match_operand:SWI248 0 "register_operand" "=r,r")
	(mult:SWI248 (match_dup 1) (match_dup 2)))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) == INTVAL (operands[3])"
  "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "imul")
   (set (attr "prefix_0f")
   	(if_then_else
	  (match_test "<MODE>mode == HImode")
	    (const_string "0")
	    (const_string "*")))
   (set (attr "athlon_decode")
	(cond [(eq_attr "cpu" "athlon")
		  (const_string "vector")
	       (eq_attr "alternative" "1")
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "amdfam10_decode")
	(cond [(ior (match_test "<MODE>mode == HImode")
		    (match_operand 1 "memory_operand"))
		  (const_string "vector")]
	      (const_string "direct")))
   (set (attr "bdver1_decode")
   	(if_then_else
	  (match_test "<MODE>mode == HImode")
	    (const_string "double")
	    (const_string "direct")))
   (set_attr "mode" "<MODE>")
   (set (attr "length_immediate")
	(cond [(eq_attr "alternative" "0")
		  (const_string "1")
	       (match_test "<MODE_SIZE> == 8")
		  (const_string "4")]
	      (const_string "<MODE_SIZE>")))])

(define_expand "umulv<mode>4"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO (mult:<DWI>
			      (zero_extend:<DWI>
				 (match_operand:SWI248 1
						      "nonimmediate_operand"))
			      (zero_extend:<DWI>
				 (match_operand:SWI248 2
						      "nonimmediate_operand")))
			   (zero_extend:<DWI>
			      (mult:SWI248 (match_dup 1) (match_dup 2)))))
	      (set (match_operand:SWI248 0 "register_operand")
		   (mult:SWI248 (match_dup 1) (match_dup 2)))
	      (clobber (match_scratch:SWI248 4))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  ""
{
  if (MEM_P (operands[1]) && MEM_P (operands[2]))
    operands[1] = force_reg (<MODE>mode, operands[1]);
})

(define_insn "*umulv<mode>4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (mult:<DWI>
		   (zero_extend:<DWI>
		      (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
		   (zero_extend:<DWI>
		      (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
		(zero_extend:<DWI>
		   (mult:SWI248 (match_dup 1) (match_dup 2)))))
   (set (match_operand:SWI248 0 "register_operand" "=a")
	(mult:SWI248 (match_dup 1) (match_dup 2)))
   (clobber (match_scratch:SWI248 3 "=d"))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "mul{<imodesuffix>}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
       (const_string "vector")
       (const_string "double")))
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "<MODE>")])

(define_expand "<u>mulvqi4"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (eq:CCO (mult:HI
			      (any_extend:HI
				 (match_operand:QI 1 "nonimmediate_operand"))
			      (any_extend:HI
				 (match_operand:QI 2 "nonimmediate_operand")))
			   (any_extend:HI
			      (mult:QI (match_dup 1) (match_dup 2)))))
	      (set (match_operand:QI 0 "register_operand")
		   (mult:QI (match_dup 1) (match_dup 2)))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 3))
	       (pc)))]
  "TARGET_QIMODE_MATH"
{
  if (MEM_P (operands[1]) && MEM_P (operands[2]))
    operands[1] = force_reg (QImode, operands[1]);
})

(define_insn "*<u>mulvqi4"
  [(set (reg:CCO FLAGS_REG)
	(eq:CCO (mult:HI
		   (any_extend:HI
		      (match_operand:QI 1 "nonimmediate_operand" "%0"))
		   (any_extend:HI
		      (match_operand:QI 2 "nonimmediate_operand" "qm")))
		(any_extend:HI
		   (mult:QI (match_dup 1) (match_dup 2)))))
   (set (match_operand:QI 0 "register_operand" "=a")
	(mult:QI (match_dup 1) (match_dup 2)))]
  "TARGET_QIMODE_MATH
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<sgnprefix>mul{b}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
	(const_string "vector")
	(const_string "direct")))
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "QI")])

(define_expand "<u>mul<mode><dwi>3"
  [(parallel [(set (match_operand:<DWI> 0 "register_operand")
		   (mult:<DWI>
		     (any_extend:<DWI>
		       (match_operand:DWIH 1 "nonimmediate_operand"))
		     (any_extend:<DWI>
		       (match_operand:DWIH 2 "register_operand"))))
	      (clobber (reg:CC FLAGS_REG))])])

(define_expand "<u>mulqihi3"
  [(parallel [(set (match_operand:HI 0 "register_operand")
		   (mult:HI
		     (any_extend:HI
		       (match_operand:QI 1 "nonimmediate_operand"))
		     (any_extend:HI
		       (match_operand:QI 2 "register_operand"))))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_QIMODE_MATH")

(define_insn "*bmi2_umul<mode><dwi>3_1"
  [(set (match_operand:DWIH 0 "register_operand" "=r")
	(mult:DWIH
	  (match_operand:DWIH 2 "nonimmediate_operand" "%d")
	  (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
   (set (match_operand:DWIH 1 "register_operand" "=r")
	(truncate:DWIH
	  (lshiftrt:<DWI>
	    (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
			(zero_extend:<DWI> (match_dup 3)))
	    (match_operand:QI 4 "const_int_operand" "n"))))]
  "TARGET_BMI2 && INTVAL (operands[4]) == <MODE_SIZE> * BITS_PER_UNIT
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "mulx\t{%3, %0, %1|%1, %0, %3}"
  [(set_attr "type" "imulx")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*umul<mode><dwi>3_1"
  [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
	(mult:<DWI>
	  (zero_extend:<DWI>
	    (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
	  (zero_extend:<DWI>
	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
   (clobber (reg:CC FLAGS_REG))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "@
   #
   mul{<imodesuffix>}\t%2"
  [(set_attr "isa" "bmi2,*")
   (set_attr "type" "imulx,imul")
   (set_attr "length_immediate" "*,0")
   (set (attr "athlon_decode")
	(cond [(eq_attr "alternative" "1")
		 (if_then_else (eq_attr "cpu" "athlon")
		   (const_string "vector")
		   (const_string "double"))]
	      (const_string "*")))
   (set_attr "amdfam10_decode" "*,double")
   (set_attr "bdver1_decode" "*,direct")
   (set_attr "prefix" "vex,orig")
   (set_attr "mode" "<MODE>")])

;; Convert mul to the mulx pattern to avoid flags dependency.
(define_split
 [(set (match_operand:<DWI> 0 "register_operand")
       (mult:<DWI>
	 (zero_extend:<DWI>
	   (match_operand:DWIH 1 "register_operand"))
	 (zero_extend:<DWI>
	   (match_operand:DWIH 2 "nonimmediate_operand"))))
  (clobber (reg:CC FLAGS_REG))]
 "TARGET_BMI2 && reload_completed
  && REGNO (operands[1]) == DX_REG"
  [(parallel [(set (match_dup 3)
		   (mult:DWIH (match_dup 1) (match_dup 2)))
	      (set (match_dup 4)
		   (truncate:DWIH
		     (lshiftrt:<DWI>
		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
				   (zero_extend:<DWI> (match_dup 2)))
		       (match_dup 5))))])]
{
  split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);

  operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
})

(define_insn "*mul<mode><dwi>3_1"
  [(set (match_operand:<DWI> 0 "register_operand" "=A")
	(mult:<DWI>
	  (sign_extend:<DWI>
	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
	  (sign_extend:<DWI>
	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
   (clobber (reg:CC FLAGS_REG))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "imul{<imodesuffix>}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "double")))
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "<MODE>")])

(define_insn "*<u>mulqihi3_1"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(mult:HI
	  (any_extend:HI
	    (match_operand:QI 1 "nonimmediate_operand" "%0"))
	  (any_extend:HI
	    (match_operand:QI 2 "nonimmediate_operand" "qm"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_QIMODE_MATH
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<sgnprefix>mul{b}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "direct")))
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "QI")])

(define_expand "<s>mul<mode>3_highpart"
  [(parallel [(set (match_operand:DWIH 0 "register_operand")
		   (truncate:DWIH
		     (lshiftrt:<DWI>
		       (mult:<DWI>
			 (any_extend:<DWI>
			   (match_operand:DWIH 1 "nonimmediate_operand"))
			 (any_extend:<DWI>
			   (match_operand:DWIH 2 "register_operand")))
		       (match_dup 3))))
	      (clobber (match_scratch:DWIH 4))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
  "operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")

(define_insn "*<s>muldi3_highpart_1"
  [(set (match_operand:DI 0 "register_operand" "=d")
	(truncate:DI
	  (lshiftrt:TI
	    (mult:TI
	      (any_extend:TI
		(match_operand:DI 1 "nonimmediate_operand" "%a"))
	      (any_extend:TI
		(match_operand:DI 2 "nonimmediate_operand" "rm")))
	    (const_int 64))))
   (clobber (match_scratch:DI 3 "=1"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<sgnprefix>mul{q}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "double")))
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "DI")])

(define_insn "*<s>mulsi3_highpart_zext"
  [(set (match_operand:DI 0 "register_operand" "=d")
	(zero_extend:DI (truncate:SI
	  (lshiftrt:DI
	    (mult:DI (any_extend:DI
		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
		     (any_extend:DI
		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
	    (const_int 32)))))
   (clobber (match_scratch:SI 3 "=1"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<sgnprefix>mul{l}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "double")))
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "SI")])

(define_insn "*<s>mulsi3_highpart_1"
  [(set (match_operand:SI 0 "register_operand" "=d")
	(truncate:SI
	  (lshiftrt:DI
	    (mult:DI
	      (any_extend:DI
		(match_operand:SI 1 "nonimmediate_operand" "%a"))
	      (any_extend:DI
		(match_operand:SI 2 "nonimmediate_operand" "rm")))
	    (const_int 32))))
   (clobber (match_scratch:SI 3 "=1"))
   (clobber (reg:CC FLAGS_REG))]
  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<sgnprefix>mul{l}\t%2"
  [(set_attr "type" "imul")
   (set_attr "length_immediate" "0")
   (set (attr "athlon_decode")
     (if_then_else (eq_attr "cpu" "athlon")
        (const_string "vector")
        (const_string "double")))
   (set_attr "amdfam10_decode" "double")
   (set_attr "bdver1_decode" "direct")
   (set_attr "mode" "SI")])

;; The patterns that match these are at the end of this file.

(define_expand "mulxf3"
  [(set (match_operand:XF 0 "register_operand")
	(mult:XF (match_operand:XF 1 "register_operand")
		 (match_operand:XF 2 "register_operand")))]
  "TARGET_80387")

(define_expand "mul<mode>3"
  [(set (match_operand:MODEF 0 "register_operand")
	(mult:MODEF (match_operand:MODEF 1 "register_operand")
		    (match_operand:MODEF 2 "nonimmediate_operand")))]
  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")

;; Divide instructions

;; The patterns that match these are at the end of this file.

(define_expand "divxf3"
  [(set (match_operand:XF 0 "register_operand")
	(div:XF (match_operand:XF 1 "register_operand")
		(match_operand:XF 2 "register_operand")))]
  "TARGET_80387")

(define_expand "div<mode>3"
  [(set (match_operand:MODEF 0 "register_operand")
	(div:MODEF (match_operand:MODEF 1 "register_operand")
		   (match_operand:MODEF 2 "nonimmediate_operand")))]
  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  if (<MODE>mode == SFmode
      && TARGET_SSE && TARGET_SSE_MATH
      && TARGET_RECIP_DIV
      && optimize_insn_for_speed_p ()
      && flag_finite_math_only && !flag_trapping_math
      && flag_unsafe_math_optimizations)
    {
      ix86_emit_swdivsf (operands[0], operands[1],
			 operands[2], SFmode);
      DONE;
    }
})

;; Divmod instructions.

(define_code_iterator any_div [div udiv])
(define_code_attr paired_mod [(div "mod") (udiv "umod")])

(define_expand "<u>divmod<mode>4"
  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
		   (any_div:SWIM248
		     (match_operand:SWIM248 1 "register_operand")
		     (match_operand:SWIM248 2 "nonimmediate_operand")))
	      (set (match_operand:SWIM248 3 "register_operand")
		   (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])])

;; Split with 8bit unsigned divide:
;; 	if (dividend an divisor are in [0-255])
;;	   use 8bit unsigned integer divide
;;	 else
;;	   use original integer divide
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(any_div:SWI48 (match_operand:SWI48 2 "register_operand")
		       (match_operand:SWI48 3 "nonimmediate_operand")))
   (set (match_operand:SWI48 1 "register_operand")
	(<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_8BIT_IDIV
   && TARGET_QIMODE_MATH
   && can_create_pseudo_p ()
   && !optimize_insn_for_size_p ()"
  [(const_int 0)]
  "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")

(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (any_div:SI (match_operand:SI 2 "register_operand")
		      (match_operand:SI 3 "nonimmediate_operand"))))
   (set (match_operand:SI 1 "register_operand")
	(<paired_mod>:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && TARGET_USE_8BIT_IDIV
   && TARGET_QIMODE_MATH
   && can_create_pseudo_p ()
   && !optimize_insn_for_size_p ()"
  [(const_int 0)]
  "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")

(define_split
  [(set (match_operand:DI 1 "register_operand")
	(zero_extend:DI
	  (<paired_mod>:SI (match_operand:SI 2 "register_operand")
			   (match_operand:SI 3 "nonimmediate_operand"))))
   (set (match_operand:SI 0 "register_operand")
	(any_div:SI  (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && TARGET_USE_8BIT_IDIV
   && TARGET_QIMODE_MATH
   && can_create_pseudo_p ()
   && !optimize_insn_for_size_p ()"
  [(const_int 0)]
  "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")

(define_insn_and_split "divmod<mode>4_1"
  [(set (match_operand:SWI48 0 "register_operand" "=a")
	(div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
		   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
   (set (match_operand:SWI48 1 "register_operand" "=&d")
	(mod:SWI48 (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  "reload_completed"
  [(parallel [(set (match_dup 1)
		   (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0)
	           (div:SWI48 (match_dup 2) (match_dup 3)))
	      (set (match_dup 1)
		   (mod:SWI48 (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);

  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[1], operands[2]);
      operands[4] = operands[1];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "udivmod<mode>4_1"
  [(set (match_operand:SWI48 0 "register_operand" "=a")
	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
		    (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
   (set (match_operand:SWI48 1 "register_operand" "=&d")
	(umod:SWI48 (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  "reload_completed"
  [(set (match_dup 1) (const_int 0))
   (parallel [(set (match_dup 0)
		   (udiv:SWI48 (match_dup 2) (match_dup 3)))
	      (set (match_dup 1)
		   (umod:SWI48 (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "divmodsi4_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(zero_extend:DI
	  (div:SI (match_operand:SI 2 "register_operand" "0")
		  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 1 "register_operand" "=&d")
	(mod:SI (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 1)
		   (ashiftrt:SI (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 1)
		   (mod:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);

  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[1], operands[2]);
      operands[4] = operands[1];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "udivmodsi4_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(zero_extend:DI
	  (udiv:SI (match_operand:SI 2 "register_operand" "0")
		   (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 1 "register_operand" "=&d")
	(umod:SI (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(set (match_dup 1) (const_int 0))
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 1)
		   (umod:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "divmodsi4_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=&d")
	(zero_extend:DI
	  (mod:SI (match_operand:SI 2 "register_operand" "0")
		  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 0 "register_operand" "=a")
	(div:SI (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 6)
		   (ashiftrt:SI (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 1)
		   (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 0)
		   (div:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 6))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
  operands[6] = gen_lowpart (SImode, operands[1]);

  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[6], operands[2]);
      operands[4] = operands[6];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "udivmodsi4_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=&d")
	(zero_extend:DI
	  (umod:SI (match_operand:SI 2 "register_operand" "0")
		 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 0 "register_operand" "=a")
	(udiv:SI (match_dup 2) (match_dup 3)))
   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(set (match_dup 4) (const_int 0))
   (parallel [(set (match_dup 1)
		   (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 0)
		   (udiv:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 4))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[4] = gen_lowpart (SImode, operands[1]);"
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*divmod<mode>4"
  [(set (match_operand:SWIM248 0 "register_operand" "=a")
	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
   (set (match_operand:SWIM248 1 "register_operand" "=&d")
	(mod:SWIM248 (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  "reload_completed"
  [(parallel [(set (match_dup 1)
		   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0)
	           (div:SWIM248 (match_dup 2) (match_dup 3)))
	      (set (match_dup 1)
		   (mod:SWIM248 (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);

  if (<MODE>mode != HImode
      && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[1], operands[2]);
      operands[4] = operands[1];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "*udivmod<mode>4"
  [(set (match_operand:SWIM248 0 "register_operand" "=a")
	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
   (set (match_operand:SWIM248 1 "register_operand" "=&d")
	(umod:SWIM248 (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  "reload_completed"
  [(set (match_dup 1) (const_int 0))
   (parallel [(set (match_dup 0)
		   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
	      (set (match_dup 1)
		   (umod:SWIM248 (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

;; Optimize division or modulo by constant power of 2, if the constant
;; materializes only after expansion.
(define_insn_and_split "*udivmod<mode>4_pow2"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
		    (match_operand:SWI48 3 "const_int_operand" "n")))
   (set (match_operand:SWI48 1 "register_operand" "=r")
	(umod:SWI48 (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
  "#"
  "&& reload_completed"
  [(set (match_dup 1) (match_dup 2))
   (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int v = exact_log2 (UINTVAL (operands[3]));
  operands[4] = GEN_INT (v);
  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
}
  [(set_attr "type" "multi")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "*divmodsi4_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(zero_extend:DI
	  (div:SI (match_operand:SI 2 "register_operand" "0")
		  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 1 "register_operand" "=&d")
	(mod:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 1)
		   (ashiftrt:SI (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 1)
		   (mod:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);

  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[1], operands[2]);
      operands[4] = operands[1];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*udivmodsi4_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(zero_extend:DI
	  (udiv:SI (match_operand:SI 2 "register_operand" "0")
		   (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 1 "register_operand" "=&d")
	(umod:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(set (match_dup 1) (const_int 0))
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 1)
		   (umod:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 1))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*udivmodsi4_pow2_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (udiv:SI (match_operand:SI 2 "register_operand" "0")
		   (match_operand:SI 3 "const_int_operand" "n"))))
   (set (match_operand:SI 1 "register_operand" "=r")
	(umod:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && exact_log2 (UINTVAL (operands[3])) > 0"
  "#"
  "&& reload_completed"
  [(set (match_dup 1) (match_dup 2))
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int v = exact_log2 (UINTVAL (operands[3]));
  operands[4] = GEN_INT (v);
  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*divmodsi4_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=&d")
	(zero_extend:DI
	  (mod:SI (match_operand:SI 2 "register_operand" "0")
		  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 0 "register_operand" "=a")
	(div:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 6)
		   (ashiftrt:SI (match_dup 4) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 1)
		   (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 0)
		   (div:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 6))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
  operands[6] = gen_lowpart (SImode, operands[1]);

  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
    operands[4] = operands[2];
  else
    {
      /* Avoid use of cltd in favor of a mov+shift.  */
      emit_move_insn (operands[6], operands[2]);
      operands[4] = operands[6];
    }
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*udivmodsi4_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=&d")
	(zero_extend:DI
	  (umod:SI (match_operand:SI 2 "register_operand" "0")
		   (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 0 "register_operand" "=a")
	(udiv:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "#"
  "&& reload_completed"
  [(set (match_dup 4) (const_int 0))
   (parallel [(set (match_dup 1)
		   (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
	      (set (match_dup 0)
		   (udiv:SI (match_dup 2) (match_dup 3)))
	      (use (match_dup 4))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[4] = gen_lowpart (SImode, operands[1]);"
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn_and_split "*udivmodsi4_pow2_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=r")
	(zero_extend:DI
	  (umod:SI (match_operand:SI 2 "register_operand" "0")
		   (match_operand:SI 3 "const_int_operand" "n"))))
   (set (match_operand:SI 0 "register_operand" "=r")
	(umod:SI (match_dup 2) (match_dup 3)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && exact_log2 (UINTVAL (operands[3])) > 0"
  "#"
  "&& reload_completed"
  [(set (match_dup 1) (match_dup 2))
   (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 1)
		   (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int v = exact_log2 (UINTVAL (operands[3]));
  operands[4] = GEN_INT (v);
  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
}
  [(set_attr "type" "multi")
   (set_attr "mode" "SI")])

(define_insn "*<u>divmod<mode>4_noext"
  [(set (match_operand:SWIM248 0 "register_operand" "=a")
	(any_div:SWIM248
	  (match_operand:SWIM248 2 "register_operand" "0")
	  (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
   (set (match_operand:SWIM248 1 "register_operand" "=d")
	(<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
   (use (match_operand:SWIM248 4 "register_operand" "1"))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "<sgnprefix>div{<imodesuffix>}\t%3"
  [(set_attr "type" "idiv")
   (set_attr "mode" "<MODE>")])

(define_insn "*<u>divmodsi4_noext_zext_1"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(zero_extend:DI
	  (any_div:SI (match_operand:SI 2 "register_operand" "0")
		      (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 1 "register_operand" "=d")
	(<paired_mod>:SI (match_dup 2) (match_dup 3)))
   (use (match_operand:SI 4 "register_operand" "1"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "<sgnprefix>div{l}\t%3"
  [(set_attr "type" "idiv")
   (set_attr "mode" "SI")])

(define_insn "*<u>divmodsi4_noext_zext_2"
  [(set (match_operand:DI 1 "register_operand" "=d")
	(zero_extend:DI
	  (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
			   (match_operand:SI 3 "nonimmediate_operand" "rm"))))
   (set (match_operand:SI 0 "register_operand" "=a")
	(any_div:SI (match_dup 2) (match_dup 3)))
   (use (match_operand:SI 4 "register_operand" "1"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "<sgnprefix>div{l}\t%3"
  [(set_attr "type" "idiv")
   (set_attr "mode" "SI")])

(define_expand "divmodqi4"
  [(parallel [(set (match_operand:QI 0 "register_operand")
		   (div:QI
		     (match_operand:QI 1 "register_operand")
		     (match_operand:QI 2 "nonimmediate_operand")))
	      (set (match_operand:QI 3 "register_operand")
		   (mod:QI (match_dup 1) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_QIMODE_MATH"
{
  rtx div, mod;
  rtx tmp0, tmp1;

  tmp0 = gen_reg_rtx (HImode);
  tmp1 = gen_reg_rtx (HImode);

  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is in AX.  */
  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));

  /* Extract remainder from AH.  */
  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
  tmp1 = lowpart_subreg (QImode, tmp1, SImode);
  rtx_insn *insn = emit_move_insn (operands[3], tmp1);

  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
  set_unique_reg_note (insn, REG_EQUAL, mod);

  /* Extract quotient from AL.  */
  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));

  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
  set_unique_reg_note (insn, REG_EQUAL, div);

  DONE;
})

(define_expand "udivmodqi4"
  [(parallel [(set (match_operand:QI 0 "register_operand")
		   (udiv:QI
		     (match_operand:QI 1 "register_operand")
		     (match_operand:QI 2 "nonimmediate_operand")))
	      (set (match_operand:QI 3 "register_operand")
		   (umod:QI (match_dup 1) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_QIMODE_MATH"
{
  rtx div, mod;
  rtx tmp0, tmp1;

  tmp0 = gen_reg_rtx (HImode);
  tmp1 = gen_reg_rtx (HImode);

  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is in AX.  */
  emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));

  /* Extract remainder from AH.  */
  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
  tmp1 = lowpart_subreg (QImode, tmp1, SImode);
  rtx_insn *insn = emit_move_insn (operands[3], tmp1);

  mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
  set_unique_reg_note (insn, REG_EQUAL, mod);

  /* Extract quotient from AL.  */
  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));

  div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
  set_unique_reg_note (insn, REG_EQUAL, div);

  DONE;
})

;; Divide AX by r/m8, with result stored in
;; AL <- Quotient
;; AH <- Remainder
;; Change div/mod to HImode and extend the second argument to HImode
;; so that mode of div/mod matches with mode of arguments.  Otherwise
;; combine may fail.
(define_insn "<u>divmodhiqi3"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(ior:HI
	  (ashift:HI
	    (zero_extend:HI
	      (truncate:QI
		(mod:HI (match_operand:HI 1 "register_operand" "0")
			(any_extend:HI
			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
	    (const_int 8))
	  (zero_extend:HI
	    (truncate:QI
	      (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_QIMODE_MATH"
  "<sgnprefix>div{b}\t%2"
  [(set_attr "type" "idiv")
   (set_attr "mode" "QI")])

;; We cannot use div/idiv for double division, because it causes
;; "division by zero" on the overflow and that's not what we expect
;; from truncate.  Because true (non truncating) double division is
;; never generated, we can't create this insn anyway.
;
;(define_insn ""
;  [(set (match_operand:SI 0 "register_operand" "=a")
;	(truncate:SI
;	  (udiv:DI (match_operand:DI 1 "register_operand" "A")
;		   (zero_extend:DI
;		     (match_operand:SI 2 "nonimmediate_operand" "rm")))))
;   (set (match_operand:SI 3 "register_operand" "=d")
;	(truncate:SI
;	  (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
;   (clobber (reg:CC FLAGS_REG))]
;  ""
;  "div{l}\t{%2, %0|%0, %2}"
;  [(set_attr "type" "idiv")])

;;- Logical AND instructions

;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
;; Note that this excludes ah.

(define_expand "@test<mode>_ccno_1"
  [(set (reg:CCNO FLAGS_REG)
	(compare:CCNO
	  (and:SWI48
	    (match_operand:SWI48 0 "nonimmediate_operand")
	    (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
	  (const_int 0)))])

(define_expand "testqi_ccz_1"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (and:QI
	    (match_operand:QI 0 "nonimmediate_operand")
	    (match_operand:QI 1 "nonmemory_operand"))
	  (const_int 0)))])

(define_insn "*testdi_1"
  [(set (reg FLAGS_REG)
	(compare
	  (and:DI
	    (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
	    (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
	 (const_int 0)))]
  "TARGET_64BIT
   && ix86_match_ccmode
	(insn,
	 /* If we are going to emit testl instead of testq, and the operands[1]
	    constant might have the SImode sign bit set, make sure the sign
	    flag isn't tested, because the instruction will set the sign flag
	    based on bit 31 rather than bit 63.  If it isn't CONST_INT,
	    conservatively assume it might have bit 31 set.  */
	 (satisfies_constraint_Z (operands[1])
	  && (!CONST_INT_P (operands[1])
	      || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
	 ? CCZmode : CCNOmode)"
  "@
   test{l}\t{%k1, %k0|%k0, %k1}
   test{q}\t{%1, %0|%0, %1}"
  [(set_attr "type" "test")
   (set_attr "mode" "SI,DI")])

(define_insn "*testqi_1_maybe_si"
  [(set (reg FLAGS_REG)
	(compare
	  (and:QI
	    (match_operand:QI 0 "nonimmediate_operand" "%qm,*a,qm,r")
	    (match_operand:QI 1 "nonmemory_operand" "q,n,n,n"))
	  (const_int 0)))]
  "ix86_match_ccmode (insn,
		      CONST_INT_P (operands[1])
		      && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
{
  if (which_alternative == 3)
    {
      if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
      return "test{l}\t{%1, %k0|%k0, %1}";
    }
  return "test{b}\t{%1, %0|%0, %1}";
}
  [(set_attr "type" "test")
   (set_attr "mode" "QI,QI,QI,SI")
   (set_attr "pent_pair" "uv,uv,np,np")])

(define_insn "*test<mode>_1"
  [(set (reg FLAGS_REG)
	(compare
	  (and:SWI124
	    (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
	    (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
	 (const_int 0)))]
  "ix86_match_ccmode (insn, CCNOmode)"
  "test{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "test")
   (set_attr "mode" "<MODE>")
   (set_attr "pent_pair" "uv,uv,np")])

(define_expand "testqi_ext_1_ccno"
  [(set (reg:CCNO FLAGS_REG)
	(compare:CCNO
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 0 "ext_register_operand")
			       (const_int 8)
			       (const_int 8)) 0)
	      (match_operand 1 "const_int_operand"))
	  (const_int 0)))])

(define_insn "*testqi_ext_1"
  [(set (reg FLAGS_REG)
	(compare
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 0 "ext_register_operand" "Q,Q")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 1 "general_operand" "QnBc,m"))
	  (const_int 0)))]
  "ix86_match_ccmode (insn, CCNOmode)"
  "test{b}\t{%1, %h0|%h0, %1}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "test")
   (set_attr "mode" "QI")])

(define_insn "*testqi_ext_2"
  [(set (reg FLAGS_REG)
	(compare
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 0 "ext_register_operand" "Q")
			       (const_int 8)
			       (const_int 8)) 0)
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
			       (const_int 8)
			       (const_int 8)) 0))
	  (const_int 0)))]
  "ix86_match_ccmode (insn, CCNOmode)"
  "test{b}\t{%h1, %h0|%h0, %h1}"
  [(set_attr "type" "test")
   (set_attr "mode" "QI")])

;; Combine likes to form bit extractions for some tests.  Humor it.
(define_insn_and_split "*testqi_ext_3"
  [(set (match_operand 0 "flags_reg_operand")
        (match_operator 1 "compare_operator"
	  [(zero_extract:SWI248
	     (match_operand 2 "nonimmediate_operand" "rm")
	     (match_operand 3 "const_int_operand" "n")
	     (match_operand 4 "const_int_operand" "n"))
	   (const_int 0)]))]
  "ix86_match_ccmode (insn, CCNOmode)
   && ((TARGET_64BIT && GET_MODE (operands[2]) == DImode)
       || GET_MODE (operands[2]) == SImode
       || GET_MODE (operands[2]) == HImode
       || GET_MODE (operands[2]) == QImode)
   /* Ensure that resulting mask is zero or sign extended operand.  */
   && INTVAL (operands[4]) >= 0
   && ((INTVAL (operands[3]) > 0
	&& INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
       || (<MODE>mode == DImode
	   && INTVAL (operands[3]) > 32
	   && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))"
  "#"
  "&& 1"
  [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
{
  rtx val = operands[2];
  HOST_WIDE_INT len = INTVAL (operands[3]);
  HOST_WIDE_INT pos = INTVAL (operands[4]);
  machine_mode mode = GET_MODE (val);

  if (SUBREG_P (val))
    {
      machine_mode submode = GET_MODE (SUBREG_REG (val));

      /* Narrow paradoxical subregs to prevent partial register stalls.  */
      if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
	  && GET_MODE_CLASS (submode) == MODE_INT)
	{
	  val = SUBREG_REG (val);
	  mode = submode;
	}
    }

  /* Small HImode tests can be converted to QImode.  */
  if (register_operand (val, HImode) && pos + len <= 8)
    {
      val = gen_lowpart (QImode, val);
      mode = QImode;
    }

  gcc_assert (pos + len <= GET_MODE_PRECISION (mode));

  wide_int mask
    = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));

  operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
})

;; Convert HImode/SImode test instructions with immediate to QImode ones.
;; i386 does not allow to encode test with 8bit sign extended immediate, so
;; this is relatively important trick.
;; Do the conversion only post-reload to avoid limiting of the register class
;; to QI regs.
(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and (match_operand 2 "QIreg_operand")
	        (match_operand 3 "const_int_operand"))
	   (const_int 0)]))]
   "reload_completed
    && GET_MODE (operands[2]) != QImode
    && ((ix86_match_ccmode (insn, CCZmode)
    	 && !(INTVAL (operands[3]) & ~(255 << 8)))
	|| (ix86_match_ccmode (insn, CCNOmode)
	    && !(INTVAL (operands[3]) & ~(127 << 8))))"
  [(set (match_dup 0)
	(match_op_dup 1
	  [(and:QI
	     (subreg:QI
	       (zero_extract:SI (match_dup 2)
				(const_int 8)
				(const_int 8)) 0)
	     (match_dup 3))
	   (const_int 0)]))]
{
  operands[2] = gen_lowpart (SImode, operands[2]);
  operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
})

(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and (match_operand 2 "nonimmediate_operand")
	        (match_operand 3 "const_int_operand"))
	   (const_int 0)]))]
   "reload_completed
    && GET_MODE (operands[2]) != QImode
    && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
    && ((ix86_match_ccmode (insn, CCZmode)
	 && !(INTVAL (operands[3]) & ~255))
	|| (ix86_match_ccmode (insn, CCNOmode)
	    && !(INTVAL (operands[3]) & ~127)))"
  [(set (match_dup 0)
	(match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
			 (const_int 0)]))]
{
  operands[2] = gen_lowpart (QImode, operands[2]);
  operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
})

;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers.  If this is considered useful,
;; it should be done with splitters.

(define_expand "and<mode>3"
  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
	(and:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")
		       (match_operand:SWIM1248s 2 "<general_szext_operand>")))]
  ""
{
  machine_mode mode = <MODE>mode;

  if (<MODE>mode == DImode && !TARGET_64BIT)
    ;
  else if (const_int_operand (operands[2], <MODE>mode)
	   && register_operand (operands[0], <MODE>mode)
	   && !(TARGET_ZERO_EXTEND_WITH_AND
		&& optimize_function_for_speed_p (cfun)))
    {
      unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);

      if (ival == GET_MODE_MASK (SImode))
	mode = SImode;
      else if (ival == GET_MODE_MASK (HImode))
	mode = HImode;
      else if (ival == GET_MODE_MASK (QImode))
	mode = QImode;
    }

  if (mode != <MODE>mode)
    emit_insn (gen_extend_insn
	       (operands[0], gen_lowpart (mode, operands[1]),
		<MODE>mode, mode, 1));
  else
    ix86_expand_binary_operator (AND, <MODE>mode, operands);

  DONE;
})

(define_insn_and_split "*anddi3_doubleword"
  [(set (match_operand:DI 0 "nonimmediate_operand")
	(and:DI
	 (match_operand:DI 1 "nonimmediate_operand")
	 (match_operand:DI 2 "x86_64_szext_general_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
   && ix86_binary_operator_ok (AND, DImode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);

  if (operands[2] == const0_rtx)
    emit_move_insn (operands[0], const0_rtx);
  else if (operands[2] == constm1_rtx)
    emit_move_insn (operands[0], operands[1]);
  else
    emit_insn (gen_andsi3 (operands[0], operands[1], operands[2]));

  if (operands[5] == const0_rtx)
    emit_move_insn (operands[3], const0_rtx);
  else if (operands[5] == constm1_rtx)
    emit_move_insn (operands[3], operands[4]);
  else
    emit_insn (gen_andsi3 (operands[3], operands[4], operands[5]));

  DONE;
})

(define_insn "*anddi_1"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
	(and:DI
	 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
  "@
   and{l}\t{%k2, %k0|%k0, %k2}
   and{q}\t{%2, %0|%0, %2}
   and{q}\t{%2, %0|%0, %2}
   #"
  [(set_attr "type" "alu,alu,alu,imovx")
   (set_attr "length_immediate" "*,*,*,0")
   (set (attr "prefix_rex")
     (if_then_else
       (and (eq_attr "type" "imovx")
	    (and (match_test "INTVAL (operands[2]) == 0xff")
		 (match_operand 1 "ext_QIreg_operand")))
       (const_string "1")
       (const_string "*")))
   (set_attr "mode" "SI,DI,DI,SI")])

(define_insn_and_split "*anddi_1_btr"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
	(and:DI
	 (match_operand:DI 1 "nonimmediate_operand" "%0")
	 (match_operand:DI 2 "const_int_operand" "n")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_USE_BT
   && ix86_binary_operator_ok (AND, DImode, operands)
   && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
  "#"
  "&& reload_completed"
  [(parallel [(set (zero_extract:DI (match_dup 0)
				    (const_int 1)
				    (match_dup 3))
		   (const_int 0))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

;; Turn *anddi_1 into *andsi_1_zext if possible.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
		(match_operand:DI 2 "x86_64_zext_immediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  [(parallel [(set (match_dup 0)
		   (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (GET_CODE (operands[2]) == SYMBOL_REF
      || GET_CODE (operands[2]) == LABEL_REF)
    {
      operands[2] = shallow_copy_rtx (operands[2]);
      PUT_MODE (operands[2], SImode);
    }
  else if (GET_CODE (operands[2]) == CONST)
    {
      /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
      operands[2] = copy_rtx (operands[2]);
      PUT_MODE (operands[2], SImode);
      PUT_MODE (XEXP (operands[2], 0), SImode);
      PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
    }    
  else
    operands[2] = gen_lowpart (SImode, operands[2]);
})

;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
		  (match_operand:SI 2 "x86_64_general_operand" "rme"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
  "and{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*and<mode>_1"
  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya")
	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm")
		   (match_operand:SWI24 2 "<general_operand>" "r<i>,m,L")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
  "@
   and{<imodesuffix>}\t{%2, %0|%0, %2}
   and{<imodesuffix>}\t{%2, %0|%0, %2}
   #"
  [(set_attr "type" "alu,alu,imovx")
   (set_attr "length_immediate" "*,*,0")
   (set (attr "prefix_rex")
     (if_then_else
       (and (eq_attr "type" "imovx")
	    (and (match_test "INTVAL (operands[2]) == 0xff")
		 (match_operand 1 "ext_QIreg_operand")))
       (const_string "1")
       (const_string "*")))
   (set_attr "mode" "<MODE>,<MODE>,SI")])

(define_insn "*andqi_1"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
		(match_operand:QI 2 "general_operand" "qn,m,rn")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (AND, QImode, operands)"
  "@
   and{b}\t{%2, %0|%0, %2}
   and{b}\t{%2, %0|%0, %2}
   and{l}\t{%k2, %k0|%k0, %k2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "QI,QI,SI")
   ;; Potential partial reg stall on alternative 2.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "2")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*and<mode>_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0")
		   (match_operand:SWI12 2 "general_operand" "<r>mn")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && (rtx_equal_p (operands[0], operands[1])
       || rtx_equal_p (operands[0], operands[2]))"
  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_split
  [(set (match_operand:SWI248 0 "register_operand")
	(and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
		    (match_operand:SWI248 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
   && (!REG_P (operands[1])
       || REGNO (operands[0]) != REGNO (operands[1]))"
  [(const_int 0)]
{
  unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
  machine_mode mode;

  if (ival == GET_MODE_MASK (SImode))
    mode = SImode;
  else if (ival == GET_MODE_MASK (HImode))
    mode = HImode;
  else if (ival == GET_MODE_MASK (QImode))
    mode = QImode;
  else
    gcc_unreachable ();

  /* Zero extend to SImode to avoid partial register stalls.  */
  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
    operands[0] = gen_lowpart (SImode, operands[0]);

  emit_insn (gen_extend_insn
	     (operands[0], gen_lowpart (mode, operands[1]),
	      GET_MODE (operands[0]), mode, 1));
  DONE;
})

(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(and:SWI48 (match_dup 0)
		   (const_int -65536)))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
    || optimize_function_for_size_p (cfun)"
  [(set (strict_low_part (match_dup 1)) (const_int 0))]
  "operands[1] = gen_lowpart (HImode, operands[0]);")

(define_split
  [(set (match_operand:SWI248 0 "any_QIreg_operand")
	(and:SWI248 (match_dup 0)
		    (const_int -256)))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   && reload_completed"
  [(set (strict_low_part (match_dup 1)) (const_int 0))]
  "operands[1] = gen_lowpart (QImode, operands[0]);")

(define_split
  [(set (match_operand:SWI248 0 "QIreg_operand")
	(and:SWI248 (match_dup 0)
		    (const_int -65281)))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   && reload_completed"
  [(parallel
     [(set (zero_extract:SI (match_dup 0)
			    (const_int 8)
			    (const_int 8))
	   (subreg:SI
	     (xor:QI
	       (subreg:QI
		 (zero_extract:SI (match_dup 0)
				  (const_int 8)
				  (const_int 8)) 0)
	       (subreg:QI
		 (zero_extract:SI (match_dup 0)
				  (const_int 8)
				  (const_int 8)) 0)) 0))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[0] = gen_lowpart (SImode, operands[0]);")

(define_insn "*anddi_2"
  [(set (reg FLAGS_REG)
	(compare
	 (and:DI
	  (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
	  (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
	 (const_int 0)))
   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
	(and:DI (match_dup 1) (match_dup 2)))]
  "TARGET_64BIT
   && ix86_match_ccmode
	(insn,
	 /* If we are going to emit andl instead of andq, and the operands[2]
	    constant might have the SImode sign bit set, make sure the sign
	    flag isn't tested, because the instruction will set the sign flag
	    based on bit 31 rather than bit 63.  If it isn't CONST_INT,
	    conservatively assume it might have bit 31 set.  */
	 (satisfies_constraint_Z (operands[2])
	  && (!CONST_INT_P (operands[2])
	      || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
	 ? CCZmode : CCNOmode)
   && ix86_binary_operator_ok (AND, DImode, operands)"
  "@
   and{l}\t{%k2, %k0|%k0, %k2}
   and{q}\t{%2, %0|%0, %2}
   and{q}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI,DI,DI")])

;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_2_zext"
  [(set (reg FLAGS_REG)
	(compare (and:SI
		  (match_operand:SI 1 "nonimmediate_operand" "%0")
		  (match_operand:SI 2 "x86_64_general_operand" "rme"))
		 (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
   && ix86_binary_operator_ok (AND, SImode, operands)"
  "and{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*andqi_2_maybe_si"
  [(set (reg FLAGS_REG)
	(compare (and:QI
		  (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
		  (match_operand:QI 2 "general_operand" "qn,m,n"))
		 (const_int 0)))
   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
	(and:QI (match_dup 1) (match_dup 2)))]
  "ix86_binary_operator_ok (AND, QImode, operands)
   && ix86_match_ccmode (insn,
			 CONST_INT_P (operands[2])
			 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
{
  if (which_alternative == 2)
    {
      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
        operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
      return "and{l}\t{%2, %k0|%k0, %2}";
    }
  return "and{b}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "alu")
   (set_attr "mode" "QI,QI,SI")
   ;; Potential partial reg stall on alternative 2.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "2")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*and<mode>_2"
  [(set (reg FLAGS_REG)
	(compare (and:SWI124
		  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
		  (match_operand:SWI124 2 "<general_operand>" "<r><i>,m"))
		 (const_int 0)))
   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
	(and:SWI124 (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCNOmode)
   && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "andqi_ext_1"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
   (clobber (reg:CC FLAGS_REG))]
  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   rtx_equal_p (operands[0], operands[1])"
  "and{b}\t{%2, %h0|%h0, %2}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "alu")
   (set_attr "mode" "QI")])

;; Generated by peephole translating test to and.  This shows up
;; often in fp comparisons.
(define_insn "*andqi_ext_1_cc"
  [(set (reg FLAGS_REG)
	(compare
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 2 "general_operand" "QnBc,m"))
	  (const_int 0)))
   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_dup 1)
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_dup 2)) 0))]
  "ix86_match_ccmode (insn, CCNOmode)
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
  "and{b}\t{%2, %h0|%h0, %2}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "alu")
   (set_attr "mode" "QI")])

(define_insn "*andqi_ext_2"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (and:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "%0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (subreg:QI
	      (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
			       (const_int 8)
			       (const_int 8)) 0)) 0))
   (clobber (reg:CC FLAGS_REG))]
  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   rtx_equal_p (operands[0], operands[1])
   || rtx_equal_p (operands[0], operands[2])"
  "and{b}\t{%h2, %h0|%h0, %h2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "QI")])

;; Convert wide AND instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls.  We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
(define_split
  [(set (match_operand:SWI248 0 "QIreg_operand")
	(and:SWI248 (match_operand:SWI248 1 "register_operand")
		    (match_operand:SWI248 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(~INTVAL (operands[2]) & ~(255 << 8))"
  [(parallel
     [(set (zero_extract:SI (match_dup 0)
			    (const_int 8)
			    (const_int 8))
	   (subreg:SI
	     (and:QI
	       (subreg:QI
		 (zero_extract:SI (match_dup 1)
				  (const_int 8)
				  (const_int 8)) 0)
	       (match_dup 2)) 0))
      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
})

;; Since AND can be encoded with sign extended immediate, this is only
;; profitable when 7th bit is not set.
(define_split
  [(set (match_operand:SWI248 0 "any_QIreg_operand")
	(and:SWI248 (match_operand:SWI248 1 "general_operand")
		    (match_operand:SWI248 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(~INTVAL (operands[2]) & ~255)
    && !(INTVAL (operands[2]) & 128)"
  [(parallel [(set (strict_low_part (match_dup 0))
		   (and:QI (match_dup 1)
			   (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (QImode, operands[0]);
  operands[1] = gen_lowpart (QImode, operands[1]);
  operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})

(define_insn "*andndi3_doubleword"
  [(set (match_operand:DI 0 "register_operand")
	(and:DI
	  (not:DI (match_operand:DI 1 "register_operand"))
	  (match_operand:DI 2 "nonimmediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
   && ix86_pre_reload_split ()"
  "#")

(define_split
  [(set (match_operand:DI 0 "register_operand")
	(and:DI
	  (not:DI (match_operand:DI 1 "register_operand"))
	  (match_operand:DI 2 "nonimmediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_BMI && TARGET_STV && TARGET_SSE2
   && can_create_pseudo_p ()"
  [(parallel [(set (match_dup 0)
		   (and:SI (not:SI (match_dup 1)) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 3)
		   (and:SI (not:SI (match_dup 4)) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")

(define_split
  [(set (match_operand:DI 0 "register_operand")
	(and:DI
	  (not:DI (match_operand:DI 1 "register_operand"))
	  (match_operand:DI 2 "nonimmediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && !TARGET_BMI && TARGET_STV && TARGET_SSE2
   && can_create_pseudo_p ()"
  [(set (match_dup 6) (not:SI (match_dup 1)))
   (parallel [(set (match_dup 0)
		   (and:SI (match_dup 6) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 7) (not:SI (match_dup 4)))
   (parallel [(set (match_dup 3)
		   (and:SI (match_dup 7) (match_dup 5)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[6] = gen_reg_rtx (SImode);
  operands[7] = gen_reg_rtx (SImode);

  split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
})

(define_insn "*andn<mode>_1"
  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
	(and:SWI48
	  (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
	  (match_operand:SWI48 2 "nonimmediate_operand" "r,m")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI"
  "andn\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "direct, double")
   (set_attr "mode" "<MODE>")])

(define_insn "*andn<mode>_1"
  [(set (match_operand:SWI12 0 "register_operand" "=r")
	(and:SWI12
	  (not:SWI12 (match_operand:SWI12 1 "register_operand" "r"))
	  (match_operand:SWI12 2 "register_operand" "r")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI"
  "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "direct")
   (set_attr "mode" "SI")])

(define_insn "*andn_<mode>_ccno"
  [(set (reg FLAGS_REG)
	(compare
	  (and:SWI48
	    (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
	    (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
	  (const_int 0)))
   (clobber (match_scratch:SWI48 0 "=r,r"))]
  "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
  "andn\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "direct, double")
   (set_attr "mode" "<MODE>")])

;; Logical inclusive and exclusive OR instructions

;; %%% This used to optimize known byte-wide and operations to memory.
;; If this is considered useful, it should be done with splitters.

(define_expand "<code><mode>3"
  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
	(any_or:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")
			  (match_operand:SWIM1248s 2 "<general_operand>")))]
  ""
  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")

(define_insn_and_split "*<code>di3_doubleword"
  [(set (match_operand:DI 0 "nonimmediate_operand")
	(any_or:DI
	 (match_operand:DI 1 "nonimmediate_operand")
	 (match_operand:DI 2 "x86_64_szext_general_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
   && ix86_binary_operator_ok (<CODE>, DImode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);

  if (operands[2] == const0_rtx)
    emit_move_insn (operands[0], operands[1]);
  else if (operands[2] == constm1_rtx)
    {
      if (<CODE> == IOR)
	emit_move_insn (operands[0], constm1_rtx);
      else
	ix86_expand_unary_operator (NOT, SImode, &operands[0]);
    }
  else
    ix86_expand_binary_operator (<CODE>, SImode, &operands[0]);

  if (operands[5] == const0_rtx)
    emit_move_insn (operands[3], operands[4]);
  else if (operands[5] == constm1_rtx)
    {
      if (<CODE> == IOR)
	emit_move_insn (operands[3], constm1_rtx);
      else
	ix86_expand_unary_operator (NOT, SImode, &operands[3]);
    }
  else
    ix86_expand_binary_operator (<CODE>, SImode, &operands[3]);

  DONE;
})

(define_insn "*<code><mode>_1"
  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r")
	(any_or:SWI248
	 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0")
	 (match_operand:SWI248 2 "<general_operand>" "r<i>,m")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "*iordi_1_bts"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
	(ior:DI
	 (match_operand:DI 1 "nonimmediate_operand" "%0")
	 (match_operand:DI 2 "const_int_operand" "n")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_USE_BT
   && ix86_binary_operator_ok (IOR, DImode, operands)
   && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
  "#"
  "&& reload_completed"
  [(parallel [(set (zero_extract:DI (match_dup 0)
				    (const_int 1)
				    (match_dup 3))
		   (const_int 1))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

(define_insn_and_split "*xordi_1_btc"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
	(xor:DI
	 (match_operand:DI 1 "nonimmediate_operand" "%0")
	 (match_operand:DI 2 "const_int_operand" "n")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_USE_BT
   && ix86_binary_operator_ok (XOR, DImode, operands)
   && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
  "#"
  "&& reload_completed"
  [(parallel [(set (zero_extract:DI (match_dup 0)
				    (const_int 1)
				    (match_dup 3))
		   (not:DI (zero_extract:DI (match_dup 0)
					    (const_int 1)
					    (match_dup 3))))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
		    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
  "<logic>{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*<code>si_1_zext_imm"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(any_or:DI
	 (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
	 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
  "<logic>{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*<code>qi_1"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
		   (match_operand:QI 2 "general_operand" "qn,m,rn")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, QImode, operands)"
  "@
   <logic>{b}\t{%2, %0|%0, %2}
   <logic>{b}\t{%2, %0|%0, %2}
   <logic>{l}\t{%k2, %k0|%k0, %k2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "QI,QI,SI")
   ;; Potential partial reg stall on alternative 2.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "2")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*<code><mode>_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0")
		      (match_operand:SWI12 2 "general_operand" "<r>mn")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && (rtx_equal_p (operands[0], operands[1])
       || rtx_equal_p (operands[0], operands[2]))"
  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*<code><mode>_2"
  [(set (reg FLAGS_REG)
	(compare (any_or:SWI
		  (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
		  (match_operand:SWI 2 "<general_operand>" "<r><i>,m"))
		 (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
	(any_or:SWI (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCNOmode)
   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
  [(set (reg FLAGS_REG)
	(compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
			    (match_operand:SI 2 "x86_64_general_operand" "rme"))
		 (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
  "<logic>{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*<code>si_2_zext_imm"
  [(set (reg FLAGS_REG)
	(compare (any_or:SI
		  (match_operand:SI 1 "nonimmediate_operand" "%0")
		  (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
		 (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
  "<logic>{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

(define_insn "*<code><mode>_3"
  [(set (reg FLAGS_REG)
	(compare (any_or:SWI
		  (match_operand:SWI 1 "nonimmediate_operand" "%0")
		  (match_operand:SWI 2 "<general_operand>" "<g>"))
		 (const_int 0)))
   (clobber (match_scratch:SWI 0 "=<r>"))]
  "ix86_match_ccmode (insn, CCNOmode)
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "*<code>qi_ext_1"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (any_or:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
  "<logic>{b}\t{%2, %h0|%h0, %2}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "alu")
   (set_attr "mode" "QI")])

(define_insn "*<code>qi_ext_2"
  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (any_or:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "%0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (subreg:QI
	      (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
			       (const_int 8)
			       (const_int 8)) 0)) 0))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && (rtx_equal_p (operands[0], operands[1])
       || rtx_equal_p (operands[0], operands[2]))"
  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "QI")])

;; Convert wide OR instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls.  We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
(define_split
  [(set (match_operand:SWI248 0 "QIreg_operand")
	(any_or:SWI248 (match_operand:SWI248 1 "register_operand")
		       (match_operand:SWI248 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(INTVAL (operands[2]) & ~(255 << 8))"
  [(parallel
     [(set (zero_extract:SI (match_dup 0)
			    (const_int 8)
			    (const_int 8))
	   (subreg:SI
	     (any_or:QI
	       (subreg:QI
		 (zero_extract:SI (match_dup 1)
				  (const_int 8)
				  (const_int 8)) 0)
	       (match_dup 2)) 0))
      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
})

;; Since OR can be encoded with sign extended immediate, this is only
;; profitable when 7th bit is set.
(define_split
  [(set (match_operand:SWI248 0 "any_QIreg_operand")
	(any_or:SWI248 (match_operand:SWI248 1 "general_operand")
		       (match_operand:SWI248 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(INTVAL (operands[2]) & ~255)
    && (INTVAL (operands[2]) & 128)"
  [(parallel [(set (strict_low_part (match_dup 0))
		   (any_or:QI (match_dup 1)
			      (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (QImode, operands[0]);
  operands[1] = gen_lowpart (QImode, operands[1]);
  operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})

(define_expand "xorqi_ext_1_cc"
  [(parallel [
     (set (reg:CCNO FLAGS_REG)
	  (compare:CCNO
	    (xor:QI
	      (subreg:QI
		(zero_extract:SI (match_operand 1 "ext_register_operand")
				 (const_int 8)
				 (const_int 8)) 0)
	      (match_operand 2 "const_int_operand"))
	    (const_int 0)))
     (set (zero_extract:SI (match_operand 0 "ext_register_operand")
			   (const_int 8)
			   (const_int 8))
	  (subreg:SI
	    (xor:QI
	      (subreg:QI
		(zero_extract:SI (match_dup 1)
				 (const_int 8)
				 (const_int 8)) 0)
	    (match_dup 2)) 0))])])

(define_insn "*xorqi_ext_1_cc"
  [(set (reg FLAGS_REG)
	(compare
	  (xor:QI
	    (subreg:QI
	      (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0")
			       (const_int 8)
			       (const_int 8)) 0)
	    (match_operand:QI 2 "general_operand" "QnBc,m"))
	  (const_int 0)))
   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
			 (const_int 8)
			 (const_int 8))
	(subreg:SI
	  (xor:QI
	    (subreg:QI
	      (zero_extract:SI (match_dup 1)
			       (const_int 8)
			       (const_int 8)) 0)
	  (match_dup 2)) 0))]
  "ix86_match_ccmode (insn, CCNOmode)
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
  "xor{b}\t{%2, %h0|%h0, %2}"
  [(set_attr "isa" "*,nox64")
   (set_attr "type" "alu")
   (set_attr "mode" "QI")])

;; Negation instructions

(define_expand "neg<mode>2"
  [(set (match_operand:SDWIM 0 "nonimmediate_operand")
	(neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
  ""
  "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")

(define_insn_and_split "*neg<dwi>2_doubleword"
  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
	(neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
  "#"
  "reload_completed"
  [(parallel
    [(set (reg:CCZ FLAGS_REG)
	  (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))
     (set (match_dup 0) (neg:DWIH (match_dup 1)))])
   (parallel
    [(set (match_dup 2)
	  (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
				(match_dup 3))
		     (const_int 0)))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 2)
	  (neg:DWIH (match_dup 2)))
     (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")

(define_insn "*neg<mode>2_1"
  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
  "neg{<imodesuffix>}\t%0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "<MODE>")])

(define_insn "*negsi2_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (neg:SI (match_operand:SI 1 "register_operand" "0"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
  "neg{l}\t%k0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "SI")])

;; The problem with neg is that it does not perform (compare x 0),
;; it really performs (compare 0 x), which leaves us with the zero
;; flag being the only useful item.

(define_insn "*neg<mode>2_cmpz"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
		   (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(neg:SWI (match_dup 1)))]
  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
  "neg{<imodesuffix>}\t%0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "<MODE>")])

(define_insn "*negsi2_cmpz_zext"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (neg:SI (match_operand:SI 1 "register_operand" "0"))
	  (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (neg:SI (match_dup 1))))]
  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
  "neg{l}\t%k0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "SI")])

;; Negate with jump on overflow.
(define_expand "negv<mode>3"
  [(parallel [(set (reg:CCO FLAGS_REG)
		   (ne:CCO (match_operand:SWI 1 "register_operand")
			   (match_dup 3)))
	      (set (match_operand:SWI 0 "register_operand")
		   (neg:SWI (match_dup 1)))])
   (set (pc) (if_then_else
	       (eq (reg:CCO FLAGS_REG) (const_int 0))
	       (label_ref (match_operand 2))
	       (pc)))]
  ""
{
  operands[3]
    = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
		    <MODE>mode);
})

(define_insn "*negv<mode>3"
  [(set (reg:CCO FLAGS_REG)
	(ne:CCO (match_operand:SWI 1 "nonimmediate_operand" "0")
		(match_operand:SWI 2 "const_int_operand")))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(neg:SWI (match_dup 1)))]
  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
   && mode_signbit_p (<MODE>mode, operands[2])"
  "neg{<imodesuffix>}\t%0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "<MODE>")])

(define_expand "<code>tf2"
  [(set (match_operand:TF 0 "register_operand")
	(absneg:TF (match_operand:TF 1 "register_operand")))]
  "TARGET_SSE"
  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")

(define_insn "*<code>tf2_1"
  [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
	(absneg:TF
	  (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
   (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_SSE"
  "#"
  [(set_attr "isa" "noavx,noavx,avx,avx")])

(define_insn "*nabstf2_1"
  [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
	(neg:TF
	  (abs:TF
	    (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
   (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
  "TARGET_SSE"
  "#"
  [(set_attr "isa" "noavx,noavx,avx,avx")])

;; Special expand pattern to handle integer mode abs

(define_expand "abs<mode>2"
  [(set (match_operand:SWI48x 0 "register_operand")
    (abs:SWI48x
      (match_operand:SWI48x 1 "register_operand")))]
  "TARGET_EXPAND_ABS"
  {
    machine_mode mode = <MODE>mode;

    /* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) -
       ((signed) x >> (W-1)) */
    rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
    rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
					 shift_amount, NULL_RTX,
					 0, OPTAB_DIRECT);
    rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
				       operands[0], 0, OPTAB_DIRECT);
    rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
					 operands[0], 0, OPTAB_DIRECT);
    if (!rtx_equal_p (minus_dst, operands[0]))
      emit_move_insn (operands[0], minus_dst);
    DONE;
  })

(define_expand "<code><mode>2"
  [(set (match_operand:X87MODEF 0 "register_operand")
	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")

;; Changing of sign for FP values is doable using integer unit too.
(define_insn "*<code><mode>2_i387_1"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
	(absneg:X87MODEF
	  (match_operand:X87MODEF 1 "register_operand" "0,0")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387"
  "#")

(define_split
  [(set (match_operand:X87MODEF 0 "fp_register_operand")
	(absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387 && reload_completed"
  [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])

(define_split
  [(set (match_operand:X87MODEF 0 "general_reg_operand")
	(absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387 && reload_completed"
  [(const_int 0)]
  "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")

(define_insn "*<code><mode>2_1"
  [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
	(absneg:MODEF
	  (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
   (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
  "#"
  [(set_attr "isa" "noavx,noavx,avx,*,*")
   (set (attr "enabled")
     (if_then_else
       (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "3,4")
	 (symbol_ref "TARGET_MIX_SSE_I387")
	 (const_string "*"))
       (if_then_else
	 (eq_attr "alternative" "3,4")
	 (symbol_ref "true")
	 (symbol_ref "false"))))])

(define_split
  [(set (match_operand:SSEMODEF 0 "sse_reg_operand")
	(absneg:SSEMODEF
	  (match_operand:SSEMODEF 1 "vector_operand")))
   (use (match_operand:<ssevecmodef> 2 "vector_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode)))
   && reload_completed"
  [(set (match_dup 0) (match_dup 3))]
{
  machine_mode mode = <MODE>mode;
  machine_mode vmode = <ssevecmodef>mode;
  enum rtx_code absneg_op = <CODE> == ABS ? AND : XOR;

  operands[0] = lowpart_subreg (vmode, operands[0], mode);
  operands[1] = lowpart_subreg (vmode, operands[1], mode);

  if (TARGET_AVX)
    {
      if (MEM_P (operands[1]))
        std::swap (operands[1], operands[2]);
    }
  else
   {
     if (operands_match_p (operands[0], operands[2]))
       std::swap (operands[1], operands[2]);
   }

  operands[3]
    = gen_rtx_fmt_ee (absneg_op, vmode, operands[1], operands[2]);
})

(define_split
  [(set (match_operand:MODEF 0 "fp_register_operand")
	(absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
   (use (match_operand 2))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387 && reload_completed"
  [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])

(define_split
  [(set (match_operand:MODEF 0 "general_reg_operand")
	(absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
   (use (match_operand 2))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_80387 && reload_completed"
  [(const_int 0)]
  "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")

(define_insn "*nabs<mode>2_1"
  [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
	(neg:MODEF
	  (abs:MODEF
	    (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
   (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "#"
  [(set_attr "isa" "noavx,noavx,avx")])

(define_split
  [(set (match_operand:SSEMODEF 0 "sse_reg_operand")
	(neg:SSEMODEF
	  (abs:SSEMODEF
	    (match_operand:SSEMODEF 1 "vector_operand"))))
   (use (match_operand:<ssevecmodef> 2 "vector_operand"))]
  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode)))
   && reload_completed"
  [(set (match_dup 0) (match_dup 3))]
{
  machine_mode mode = <MODE>mode;
  machine_mode vmode = <ssevecmodef>mode;

  operands[0] = lowpart_subreg (vmode, operands[0], mode);
  operands[1] = lowpart_subreg (vmode, operands[1], mode);

  if (TARGET_AVX)
    {
      if (MEM_P (operands[1]))
        std::swap (operands[1], operands[2]);
    }
  else
   {
     if (operands_match_p (operands[0], operands[2]))
       std::swap (operands[1], operands[2]);
   }

  operands[3]
    = gen_rtx_fmt_ee (IOR, vmode, operands[1], operands[2]);
})

;; Conditionalize these after reload. If they match before reload, we
;; lose the clobber and ability to use integer instructions.

(define_insn "*<code><mode>2_i387"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
  "TARGET_80387 && reload_completed"
  "<absneg_mnemonic>"
  [(set_attr "type" "fsgn")
   (set_attr "mode" "<MODE>")])

;; Copysign instructions

(define_expand "copysign<mode>3"
  [(match_operand:SSEMODEF 0 "register_operand")
   (match_operand:SSEMODEF 1 "nonmemory_operand")
   (match_operand:SSEMODEF 2 "register_operand")]
  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
   || (TARGET_SSE && (<MODE>mode == TFmode))"
  "ix86_expand_copysign (operands); DONE;")

(define_insn_and_split "@copysign<mode>3_const"
  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
	(unspec:SSEMODEF
	  [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
	   (match_operand:SSEMODEF 2 "register_operand" "0")
	   (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
	  UNSPEC_COPYSIGN))]
  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
   || (TARGET_SSE && (<MODE>mode == TFmode))"
  "#"
  "&& reload_completed"
  [(const_int 0)]
  "ix86_split_copysign_const (operands); DONE;")

(define_insn "@copysign<mode>3_var"
  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
	(unspec:SSEMODEF
	  [(match_operand:SSEMODEF 2 "register_operand"	"Yv,0,0,Yv,Yv")
	   (match_operand:SSEMODEF 3 "register_operand"	"1,1,Yv,1,Yv")
	   (match_operand:<ssevecmodef> 4
	     "nonimmediate_operand" "X,Yvm,Yvm,0,0")
	   (match_operand:<ssevecmodef> 5
	     "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
	  UNSPEC_COPYSIGN))
   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
   || (TARGET_SSE && (<MODE>mode == TFmode))"
  "#")

(define_split
  [(set (match_operand:SSEMODEF 0 "register_operand")
	(unspec:SSEMODEF
	  [(match_operand:SSEMODEF 2 "register_operand")
	   (match_operand:SSEMODEF 3 "register_operand")
	   (match_operand:<ssevecmodef> 4)
	   (match_operand:<ssevecmodef> 5)]
	  UNSPEC_COPYSIGN))
   (clobber (match_scratch:<ssevecmodef> 1))]
  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode)))
   && reload_completed"
  [(const_int 0)]
  "ix86_split_copysign_var (operands); DONE;")

(define_expand "xorsign<mode>3"
  [(match_operand:MODEF 0 "register_operand")
   (match_operand:MODEF 1 "register_operand")
   (match_operand:MODEF 2 "register_operand")]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "ix86_expand_xorsign (operands); DONE;")

(define_insn_and_split "@xorsign<mode>3_1"
  [(set (match_operand:MODEF 0 "register_operand" "=Yv")
	(unspec:MODEF
	  [(match_operand:MODEF 1 "register_operand" "Yv")
	   (match_operand:MODEF 2 "register_operand" "0")
	   (match_operand:<ssevecmode> 3 "nonimmediate_operand" "Yvm")]
	  UNSPEC_XORSIGN))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "#"
  "&& reload_completed"
  [(const_int 0)]
  "ix86_split_xorsign (operands); DONE;")

;; One complement instructions

(define_expand "one_cmpl<mode>2"
  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
	(not:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")))]
  ""
  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")

(define_insn_and_split "*one_cmpldi2_doubleword"
  [(set (match_operand:DI 0 "nonimmediate_operand")
	(not:DI (match_operand:DI 1 "nonimmediate_operand")))]
  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
   && ix86_unary_operator_ok (NOT, DImode, operands)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (match_dup 0)
	(not:SI (match_dup 1)))
   (set (match_dup 2)
	(not:SI (match_dup 3)))]
  "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")

(define_insn "*one_cmpl<mode>2_1"
  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
	(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
  "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
  "not{<imodesuffix>}\t%0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "<MODE>")])

(define_insn "*one_cmplsi2_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (not:SI (match_operand:SI 1 "register_operand" "0"))))]
  "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
  "not{l}\t%k0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "SI")])

(define_insn "*one_cmplqi2_1"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
  "ix86_unary_operator_ok (NOT, QImode, operands)"
  "@
   not{b}\t%0
   not{l}\t%k0"
  [(set_attr "type" "negnot")
   (set_attr "mode" "QI,SI")
   ;; Potential partial reg stall on alternative 1.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "1")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*one_cmpl<mode>2_2"
  [(set (reg FLAGS_REG)
	(compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
		 (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(not:SWI (match_dup 1)))]
  "ix86_match_ccmode (insn, CCNOmode)
   && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
  "#"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")])

(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 2 "compare_operator"
	  [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
	   (const_int 0)]))
   (set (match_operand:SWI 1 "nonimmediate_operand")
	(not:SWI (match_dup 3)))]
  "ix86_match_ccmode (insn, CCNOmode)"
  [(parallel [(set (match_dup 0)
		   (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
				    (const_int 0)]))
	      (set (match_dup 1)
		   (xor:SWI (match_dup 3) (const_int -1)))])])

(define_insn "*one_cmplsi2_2_zext"
  [(set (reg FLAGS_REG)
	(compare (not:SI (match_operand:SI 1 "register_operand" "0"))
		 (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (not:SI (match_dup 1))))]
  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
   && ix86_unary_operator_ok (NOT, SImode, operands)"
  "#"
  [(set_attr "type" "alu1")
   (set_attr "mode" "SI")])

(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 2 "compare_operator"
	  [(not:SI (match_operand:SI 3 "register_operand"))
	   (const_int 0)]))
   (set (match_operand:DI 1 "register_operand")
	(zero_extend:DI (not:SI (match_dup 3))))]
  "ix86_match_ccmode (insn, CCNOmode)"
  [(parallel [(set (match_dup 0)
		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
				    (const_int 0)]))
	      (set (match_dup 1)
		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])

;; Shift instructions

;; DImode shifts are implemented using the i386 "shift double" opcode,
;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
;; is variable, then the count is in %cl and the "imm" operand is dropped
;; from the assembler input.
;;
;; This instruction shifts the target reg/mem as usual, but instead of
;; shifting in zeros, bits are shifted in from reg operand.  If the insn
;; is a left shift double, bits are taken from the high order bits of
;; reg, else if the insn is a shift right double, bits are taken from the
;; low order bits of reg.  So if %eax is "1234" and %edx is "5678",
;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
;;
;; Since sh[lr]d does not change the `reg' operand, that is done
;; separately, making all shifts emit pairs of shift double and normal
;; shift.  Since sh[lr]d does not shift more than 31 bits, and we wish to
;; support a 63 bit shift, each shift where the count is in a reg expands
;; to a pair of shifts, a branch, a shift by 32 and a label.
;;
;; If the shift count is a constant, we need never emit more than one
;; shift pair, instead using moves and sign extension for counts greater
;; than 31.

(define_expand "ashl<mode>3"
  [(set (match_operand:SDWIM 0 "<shift_operand>")
	(ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
		      (match_operand:QI 2 "nonmemory_operand")))]
  ""
  "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")

(define_insn_and_split "*ashl<dwi>3_doubleword_mask"
  [(set (match_operand:<DWI> 0 "register_operand")
	(ashift:<DWI>
	  (match_operand:<DWI> 1 "register_operand")
	  (subreg:QI
	    (and:SI
	      (match_operand:SI 2 "register_operand" "c")
	      (match_operand:SI 3 "const_int_operand")) 0)))
   (clobber (reg:CC FLAGS_REG))]
  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 6)
	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
		     (lshiftrt:DWIH (match_dup 5)
		       (minus:QI (match_dup 8) (match_dup 2)))))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (match_dup 4)
	   (ashift:DWIH (match_dup 5) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);

  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);

  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
    {
      rtx tem = gen_reg_rtx (SImode);
      emit_insn (gen_andsi3 (tem, operands[2], operands[3]));
      operands[2] = tem;
    }

  operands[2] = gen_lowpart (QImode, operands[2]);

  if (!rtx_equal_p (operands[6], operands[7]))
    emit_move_insn (operands[6], operands[7]);
})

(define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
  [(set (match_operand:<DWI> 0 "register_operand")
	(ashift:<DWI>
	  (match_operand:<DWI> 1 "register_operand")
	  (and:QI
	    (match_operand:QI 2 "register_operand" "c")
	    (match_operand:QI 3 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 6)
	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
		     (lshiftrt:DWIH (match_dup 5)
		       (minus:QI (match_dup 8) (match_dup 2)))))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (match_dup 4)
	   (ashift:DWIH (match_dup 5) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);

  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);

  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
    {
      rtx tem = gen_reg_rtx (QImode);
      emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
      operands[2] = tem;
    }

  if (!rtx_equal_p (operands[6], operands[7]))
    emit_move_insn (operands[6], operands[7]);
})

(define_insn "*ashl<mode>3_doubleword"
  [(set (match_operand:DWI 0 "register_operand" "=&r")
	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
		    (match_operand:QI 2 "nonmemory_operand" "<S>c")))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  [(set_attr "type" "multi")])

(define_split
  [(set (match_operand:DWI 0 "register_operand")
	(ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
		    (match_operand:QI 2 "nonmemory_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "epilogue_completed"
  [(const_int 0)]
  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")

;; By default we don't ask for a scratch register, because when DWImode
;; values are manipulated, registers are already at a premium.  But if
;; we have one handy, we won't turn it away.

(define_peephole2
  [(match_scratch:DWIH 3 "r")
   (parallel [(set (match_operand:<DWI> 0 "register_operand")
		   (ashift:<DWI>
		     (match_operand:<DWI> 1 "nonmemory_operand")
		     (match_operand:QI 2 "nonmemory_operand")))
	      (clobber (reg:CC FLAGS_REG))])
   (match_dup 3)]
  "TARGET_CMOVE"
  [(const_int 0)]
  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")

(define_insn "x86_64_shld"
  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
        (ior:DI (ashift:DI (match_dup 0)
		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
		(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
		  (minus:QI (const_int 64) (match_dup 2)))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
  [(set_attr "type" "ishift")
   (set_attr "prefix_0f" "1")
   (set_attr "mode" "DI")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "vector")
   (set_attr "bdver1_decode" "vector")])

(define_insn "x86_shld"
  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
        (ior:SI (ashift:SI (match_dup 0)
		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
		  (minus:QI (const_int 32) (match_dup 2)))))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
  [(set_attr "type" "ishift")
   (set_attr "prefix_0f" "1")
   (set_attr "mode" "SI")
   (set_attr "pent_pair" "np")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "vector")
   (set_attr "bdver1_decode" "vector")])

(define_expand "@x86_shift<mode>_adj_1"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
			     (match_dup 4))
		     (const_int 0)))
   (set (match_operand:SWI48 0 "register_operand")
        (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
			    (match_operand:SWI48 1 "register_operand")
			    (match_dup 0)))
   (set (match_dup 1)
	(if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
			    (match_operand:SWI48 3 "register_operand")
			    (match_dup 1)))]
  "TARGET_CMOVE"
  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")

(define_expand "@x86_shift<mode>_adj_2"
  [(use (match_operand:SWI48 0 "register_operand"))
   (use (match_operand:SWI48 1 "register_operand"))
   (use (match_operand:QI 2 "register_operand"))]
  ""
{
  rtx_code_label *label = gen_label_rtx ();
  rtx tmp;

  emit_insn (gen_testqi_ccz_1 (operands[2],
			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));

  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
			      gen_rtx_LABEL_REF (VOIDmode, label),
			      pc_rtx);
  tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
  JUMP_LABEL (tmp) = label;

  emit_move_insn (operands[0], operands[1]);
  ix86_expand_clear (operands[1]);

  emit_label (label);
  LABEL_NUSES (label) = 1;

  DONE;
})

;; Avoid useless masking of count operand.
(define_insn_and_split "*ashl<mode>3_mask"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(ashift:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (subreg:QI
	    (and:SI
	      (match_operand:SI 2 "register_operand" "c,r")
	      (match_operand:SI 3 "const_int_operand")) 0)))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (ashift:SWI48 (match_dup 1)
			 (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[2] = gen_lowpart (QImode, operands[2]);"
  [(set_attr "isa" "*,bmi2")])

(define_insn_and_split "*ashl<mode>3_mask_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(ashift:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (and:QI
	    (match_operand:QI 2 "register_operand" "c,r")
	    (match_operand:QI 3 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (ashift:SWI48 (match_dup 1)
			 (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "isa" "*,bmi2")])

(define_insn "*bmi2_ashl<mode>3_1"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
		      (match_operand:SWI48 2 "register_operand" "r")))]
  "TARGET_BMI2"
  "shlx\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "ishiftx")
   (set_attr "mode" "<MODE>")])

(define_insn "*ashl<mode>3_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
    case TYPE_ISHIFTX:
      return "#";

    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      return "add{<imodesuffix>}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{<imodesuffix>}\t%0";
      else
	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set_attr "isa" "*,*,bmi2")
   (set (attr "type")
     (cond [(eq_attr "alternative" "1")
	      (const_string "lea")
	    (eq_attr "alternative" "2")
	      (const_string "ishiftx")
            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
		      (match_operand 0 "register_operand"))
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
		      (match_operand:QI 2 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")

(define_insn "*bmi2_ashlsi3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
		     (match_operand:SI 2 "register_operand" "r"))))]
  "TARGET_64BIT && TARGET_BMI2"
  "shlx\t{%2, %1, %k0|%k0, %1, %2}"
  [(set_attr "type" "ishiftx")
   (set_attr "mode" "SI")])

(define_insn "*ashlsi3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
	(zero_extend:DI
	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
    case TYPE_ISHIFTX:
      return "#";

    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{l}\t%k0, %k0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{l}\t%k0";
      else
	return "sal{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set_attr "isa" "*,*,bmi2")
   (set (attr "type")
     (cond [(eq_attr "alternative" "1")
	      (const_string "lea")
	    (eq_attr "alternative" "2")
	      (const_string "ishiftx")
            (and (match_test "TARGET_DOUBLE_WITH_ADD")
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "SI")])

;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
		     (match_operand:QI 2 "register_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
  "operands[2] = gen_lowpart (SImode, operands[2]);")

(define_insn "*ashlhi3_1"
  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{w}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{w}\t%0";
      else
	return "sal{w}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "1")
	      (const_string "lea")
            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
		      (match_operand 0 "register_operand"))
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "HI,SI")])

(define_insn "*ashlqi3_1"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_LEA:
      return "#";

    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
        return "add{l}\t%k0, %k0";
      else
        return "add{b}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	{
	  if (get_attr_mode (insn) == MODE_SI)
	    return "sal{l}\t%k0";
	  else
	    return "sal{b}\t%0";
	}
      else
	{
	  if (get_attr_mode (insn) == MODE_SI)
	    return "sal{l}\t{%2, %k0|%k0, %2}";
	  else
	    return "sal{b}\t{%2, %0|%0, %2}";
	}
    }
}
  [(set (attr "type")
     (cond [(eq_attr "alternative" "2")
	      (const_string "lea")
            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
		      (match_operand 0 "register_operand"))
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "QI,SI,SI")
   ;; Potential partial reg stall on alternative 1.
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "1")
	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
	   (symbol_ref "true")))])

(define_insn "*ashl<mode>3_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0")
		      (match_operand:QI 2 "nonmemory_operand" "cI")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{<imodesuffix>}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{<imodesuffix>}\t%0";
      else
	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
  [(set (match_operand:SWI 0 "register_operand")
	(ashift:SWI (match_operand:SWI 1 "index_register_operand")
		    (match_operand 2 "const_0_to_3_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
   && REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0)
	(mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
{
  if (<MODE>mode != <LEAMODE>mode)
    {
      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
    }
  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
})

;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (ashift:SI (match_operand:SI 1 "index_register_operand")
		     (match_operand 2 "const_0_to_3_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && reload_completed
   && REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0)
	(zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
{
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
})

;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags.  We assume that shifts by constant
;; zero are optimized away.
(define_insn "*ashl<mode>3_cmp"
  [(set (reg FLAGS_REG)
	(compare
	  (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
	  (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(ashift:SWI (match_dup 1) (match_dup 2)))]
  "(optimize_function_for_size_p (cfun)
    || !TARGET_PARTIAL_FLAG_REG_STALL
    || (operands[2] == const1_rtx
	&& (TARGET_SHIFT1
	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
   && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{<imodesuffix>}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{<imodesuffix>}\t%0";
      else
	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
		      (match_operand 0 "register_operand"))
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "*ashlsi3_cmp_zext"
  [(set (reg FLAGS_REG)
	(compare
	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
	  (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT
   && (optimize_function_for_size_p (cfun)
       || !TARGET_PARTIAL_FLAG_REG_STALL
       || (operands[2] == const1_rtx
	   && (TARGET_SHIFT1
	       || TARGET_DOUBLE_WITH_ADD)))
   && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{l}\t%k0, %k0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{l}\t%k0";
      else
	return "sal{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set (attr "type")
     (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "SI")])

(define_insn "*ashl<mode>3_cconly"
  [(set (reg FLAGS_REG)
	(compare
	  (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
	  (const_int 0)))
   (clobber (match_scratch:SWI 0 "=<r>"))]
  "(optimize_function_for_size_p (cfun)
    || !TARGET_PARTIAL_FLAG_REG_STALL
    || (operands[2] == const1_rtx
	&& (TARGET_SHIFT1
	    || TARGET_DOUBLE_WITH_ADD)))
   && ix86_match_ccmode (insn, CCGOCmode)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ALU:
      gcc_assert (operands[2] == const1_rtx);
      return "add{<imodesuffix>}\t%0, %0";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "sal{<imodesuffix>}\t%0";
      else
	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set (attr "type")
     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
		      (match_operand 0 "register_operand"))
		 (match_operand 2 "const1_operand"))
	      (const_string "alu")
	   ]
	   (const_string "ishift")))
   (set (attr "length_immediate")
     (if_then_else
       (ior (eq_attr "type" "alu")
	    (and (eq_attr "type" "ishift")
		 (and (match_operand 2 "const1_operand")
		      (ior (match_test "TARGET_SHIFT1")
			   (match_test "optimize_function_for_size_p (cfun)")))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; See comment above `ashl<mode>3' about how this works.

(define_expand "<shift_insn><mode>3"
  [(set (match_operand:SDWIM 0 "<shift_operand>")
	(any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
			   (match_operand:QI 2 "nonmemory_operand")))]
  ""
  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")

;; Avoid useless masking of count operand.
(define_insn_and_split "*<shift_insn><mode>3_mask"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(any_shiftrt:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (subreg:QI
	    (and:SI
	      (match_operand:SI 2 "register_operand" "c,r")
	      (match_operand:SI 3 "const_int_operand")) 0)))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_shiftrt:SWI48 (match_dup 1)
			      (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[2] = gen_lowpart (QImode, operands[2]);"
  [(set_attr "isa" "*,bmi2")])

(define_insn_and_split "*<shift_insn><mode>3_mask_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(any_shiftrt:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (and:QI
	    (match_operand:QI 2 "register_operand" "c,r")
	    (match_operand:QI 3 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_shiftrt:SWI48 (match_dup 1)
			      (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  ""
  [(set_attr "isa" "*,bmi2")])

(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask"
  [(set (match_operand:<DWI> 0 "register_operand")
	(any_shiftrt:<DWI>
	  (match_operand:<DWI> 1 "register_operand")
	  (subreg:QI
	    (and:SI
	      (match_operand:SI 2 "register_operand" "c")
	      (match_operand:SI 3 "const_int_operand")) 0)))
   (clobber (reg:CC FLAGS_REG))]
  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 4)
	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
		     (ashift:DWIH (match_dup 7)
		       (minus:QI (match_dup 8) (match_dup 2)))))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (match_dup 6)
	   (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);

  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);

  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
    {
      rtx tem = gen_reg_rtx (SImode);
      emit_insn (gen_andsi3 (tem, operands[2], operands[3]));
      operands[2] = tem;
    }

  operands[2] = gen_lowpart (QImode, operands[2]);

  if (!rtx_equal_p (operands[4], operands[5]))
    emit_move_insn (operands[4], operands[5]);
})

(define_insn_and_split "*<shift_insn><dwi>3_doubleword_mask_1"
  [(set (match_operand:<DWI> 0 "register_operand")
	(any_shiftrt:<DWI>
	  (match_operand:<DWI> 1 "register_operand")
	  (and:QI
	    (match_operand:QI 2 "register_operand" "c")
	    (match_operand:QI 3 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 4)
	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
		     (ashift:DWIH (match_dup 7)
		       (minus:QI (match_dup 8) (match_dup 2)))))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (match_dup 6)
	   (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
{
  split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);

  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);

  if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
      != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
    {
      rtx tem = gen_reg_rtx (QImode);
      emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
      operands[2] = tem;
    }

  if (!rtx_equal_p (operands[4], operands[5]))
    emit_move_insn (operands[4], operands[5]);
})

(define_insn_and_split "*<shift_insn><mode>3_doubleword"
  [(set (match_operand:DWI 0 "register_operand" "=&r")
	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  "epilogue_completed"
  [(const_int 0)]
  "ix86_split_<shift_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
  [(set_attr "type" "multi")])

;; By default we don't ask for a scratch register, because when DWImode
;; values are manipulated, registers are already at a premium.  But if
;; we have one handy, we won't turn it away.

(define_peephole2
  [(match_scratch:DWIH 3 "r")
   (parallel [(set (match_operand:<DWI> 0 "register_operand")
		   (any_shiftrt:<DWI>
		     (match_operand:<DWI> 1 "register_operand")
		     (match_operand:QI 2 "nonmemory_operand")))
	      (clobber (reg:CC FLAGS_REG))])
   (match_dup 3)]
  "TARGET_CMOVE"
  [(const_int 0)]
  "ix86_split_<shift_insn> (operands, operands[3], <DWI>mode); DONE;")

(define_insn "x86_64_shrd"
  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
        (ior:DI (lshiftrt:DI (match_dup 0)
		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
		(ashift:DI (match_operand:DI 1 "register_operand" "r")
		  (minus:QI (const_int 64) (match_dup 2)))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
  [(set_attr "type" "ishift")
   (set_attr "prefix_0f" "1")
   (set_attr "mode" "DI")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "vector")
   (set_attr "bdver1_decode" "vector")])

(define_insn "x86_shrd"
  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
        (ior:SI (lshiftrt:SI (match_dup 0)
		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
		(ashift:SI (match_operand:SI 1 "register_operand" "r")
		  (minus:QI (const_int 32) (match_dup 2)))))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
  [(set_attr "type" "ishift")
   (set_attr "prefix_0f" "1")
   (set_attr "mode" "SI")
   (set_attr "pent_pair" "np")
   (set_attr "athlon_decode" "vector")
   (set_attr "amdfam10_decode" "vector")
   (set_attr "bdver1_decode" "vector")])

;; Base name for insn mnemonic.
(define_mode_attr cvt_mnemonic
  [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])

(define_insn "ashr<mode>3_cvt"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
	(ashiftrt:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
	  (match_operand:QI 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
  "@
   <cvt_mnemonic>
   sar{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "imovx,ishift")
   (set_attr "prefix_0f" "0,*")
   (set_attr "length_immediate" "0,*")
   (set_attr "modrm" "0,1")
   (set_attr "mode" "<MODE>")])

(define_insn "*ashrsi3_cvt_zext"
  [(set (match_operand:DI 0 "register_operand" "=*d,r")
	(zero_extend:DI
	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
		       (match_operand:QI 2 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && INTVAL (operands[2]) == 31
   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
  "@
   {cltd|cdq}
   sar{l}\t{%2, %k0|%k0, %2}"
  [(set_attr "type" "imovx,ishift")
   (set_attr "prefix_0f" "0,*")
   (set_attr "length_immediate" "0,*")
   (set_attr "modrm" "0,1")
   (set_attr "mode" "SI")])

(define_expand "@x86_shift<mode>_adj_3"
  [(use (match_operand:SWI48 0 "register_operand"))
   (use (match_operand:SWI48 1 "register_operand"))
   (use (match_operand:QI 2 "register_operand"))]
  ""
{
  rtx_code_label *label = gen_label_rtx ();
  rtx tmp;

  emit_insn (gen_testqi_ccz_1 (operands[2],
			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));

  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
			      gen_rtx_LABEL_REF (VOIDmode, label),
			      pc_rtx);
  tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
  JUMP_LABEL (tmp) = label;

  emit_move_insn (operands[0], operands[1]);
  emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
				  GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
  emit_label (label);
  LABEL_NUSES (label) = 1;

  DONE;
})

(define_insn "*bmi2_<shift_insn><mode>3_1"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
			   (match_operand:SWI48 2 "register_operand" "r")))]
  "TARGET_BMI2"
  "<shift>x\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "ishiftx")
   (set_attr "mode" "<MODE>")])

(define_insn "*<shift_insn><mode>3_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
	(any_shiftrt:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ISHIFTX:
      return "#";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "<shift>{<imodesuffix>}\t%0";
      else
	return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set_attr "isa" "*,bmi2")
   (set_attr "type" "ishift,ishiftx")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
			   (match_operand:QI 2 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")

(define_insn "*bmi2_<shift_insn>si3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
			  (match_operand:SI 2 "register_operand" "r"))))]
  "TARGET_64BIT && TARGET_BMI2"
  "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
  [(set_attr "type" "ishiftx")
   (set_attr "mode" "SI")])

(define_insn "*<shift_insn>si3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r")
	(zero_extend:DI
	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ISHIFTX:
      return "#";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "<shift>{l}\t%k0";
      else
	return "<shift>{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set_attr "isa" "*,bmi2")
   (set_attr "type" "ishift,ishiftx")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "SI")])

;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
			  (match_operand:QI 2 "register_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
  "operands[2] = gen_lowpart (SImode, operands[2]);")

(define_insn "*<shift_insn><mode>3_1"
  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
	(any_shiftrt:SWI12
	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<shift>{<imodesuffix>}\t%0";
  else
    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "ishift")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "*<shift_insn><mode>3_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0")
			   (match_operand:QI 2 "nonmemory_operand" "cI")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<shift>{<imodesuffix>}\t%0";
  else
    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "ishift")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags.  We assume that shifts by constant
;; zero are optimized away.
(define_insn "*<shift_insn><mode>3_cmp"
  [(set (reg FLAGS_REG)
	(compare
	  (any_shiftrt:SWI
	    (match_operand:SWI 1 "nonimmediate_operand" "0")
	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
	  (const_int 0)))
   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
	(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
  "(optimize_function_for_size_p (cfun)
    || !TARGET_PARTIAL_FLAG_REG_STALL
    || (operands[2] == const1_rtx
	&& TARGET_SHIFT1))
   && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<shift>{<imodesuffix>}\t%0";
  else
    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "ishift")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "*<shift_insn>si3_cmp_zext"
  [(set (reg FLAGS_REG)
	(compare
	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
			  (match_operand:QI 2 "const_1_to_31_operand" "I"))
	  (const_int 0)))
   (set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
  "TARGET_64BIT
   && (optimize_function_for_size_p (cfun)
       || !TARGET_PARTIAL_FLAG_REG_STALL
       || (operands[2] == const1_rtx
	   && TARGET_SHIFT1))
   && ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<shift>{l}\t%k0";
  else
    return "<shift>{l}\t{%2, %k0|%k0, %2}";
}
  [(set_attr "type" "ishift")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "SI")])

(define_insn "*<shift_insn><mode>3_cconly"
  [(set (reg FLAGS_REG)
	(compare
	  (any_shiftrt:SWI
	    (match_operand:SWI 1 "register_operand" "0")
	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
	  (const_int 0)))
   (clobber (match_scratch:SWI 0 "=<r>"))]
  "(optimize_function_for_size_p (cfun)
    || !TARGET_PARTIAL_FLAG_REG_STALL
    || (operands[2] == const1_rtx
	&& TARGET_SHIFT1))
   && ix86_match_ccmode (insn, CCGOCmode)"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<shift>{<imodesuffix>}\t%0";
  else
    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "ishift")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; Rotate instructions

(define_expand "<rotate_insn>ti3"
  [(set (match_operand:TI 0 "register_operand")
	(any_rotate:TI (match_operand:TI 1 "register_operand")
		       (match_operand:QI 2 "nonmemory_operand")))]
  "TARGET_64BIT"
{
  if (const_1_to_63_operand (operands[2], VOIDmode))
    emit_insn (gen_ix86_<rotate_insn>ti3_doubleword
		(operands[0], operands[1], operands[2]));
  else
    FAIL;

  DONE;
})

(define_expand "<rotate_insn>di3"
  [(set (match_operand:DI 0 "shiftdi_operand")
	(any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
		       (match_operand:QI 2 "nonmemory_operand")))]
 ""
{
  if (TARGET_64BIT)
    ix86_expand_binary_operator (<CODE>, DImode, operands);
  else if (const_1_to_31_operand (operands[2], VOIDmode))
    emit_insn (gen_ix86_<rotate_insn>di3_doubleword
		(operands[0], operands[1], operands[2]));
  else
    FAIL;

  DONE;
})

(define_expand "<rotate_insn><mode>3"
  [(set (match_operand:SWIM124 0 "nonimmediate_operand")
	(any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
			    (match_operand:QI 2 "nonmemory_operand")))]
  ""
  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")

;; Avoid useless masking of count operand.
(define_insn_and_split "*<rotate_insn><mode>3_mask"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(any_rotate:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (subreg:QI
	    (and:SI
	      (match_operand:SI 2 "register_operand" "c")
	      (match_operand:SI 3 "const_int_operand")) 0)))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_rotate:SWI48 (match_dup 1)
			     (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[2] = gen_lowpart (QImode, operands[2]);")

(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
	(any_rotate:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand")
	  (and:QI
	    (match_operand:QI 2 "register_operand" "c")
	    (match_operand:QI 3 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_rotate:SWI48 (match_dup 1)
			     (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])])

;; Implement rotation using two double-precision
;; shift instructions and a scratch register.

(define_insn_and_split "ix86_rotl<dwi>3_doubleword"
 [(set (match_operand:<DWI> 0 "register_operand" "=r")
       (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
		     (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
  (clobber (reg:CC FLAGS_REG))
  (clobber (match_scratch:DWIH 3 "=&r"))]
 ""
 "#"
 "reload_completed"
 [(set (match_dup 3) (match_dup 4))
  (parallel
   [(set (match_dup 4)
	 (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
		   (lshiftrt:DWIH (match_dup 5)
				  (minus:QI (match_dup 6) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))])
  (parallel
   [(set (match_dup 5)
	 (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
		   (lshiftrt:DWIH (match_dup 3)
				  (minus:QI (match_dup 6) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))])]
{
  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));

  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
})

(define_insn_and_split "ix86_rotr<dwi>3_doubleword"
 [(set (match_operand:<DWI> 0 "register_operand" "=r")
       (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
		       (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
  (clobber (reg:CC FLAGS_REG))
  (clobber (match_scratch:DWIH 3 "=&r"))]
 ""
 "#"
 "reload_completed"
 [(set (match_dup 3) (match_dup 4))
  (parallel
   [(set (match_dup 4)
	 (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
		   (ashift:DWIH (match_dup 5)
				(minus:QI (match_dup 6) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))])
  (parallel
   [(set (match_dup 5)
	 (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
		   (ashift:DWIH (match_dup 3)
				(minus:QI (match_dup 6) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))])]
{
  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));

  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
})

(define_mode_attr rorx_immediate_operand
	[(SI "const_0_to_31_operand")
	 (DI "const_0_to_63_operand")])

(define_insn "*bmi2_rorx<mode>3_1"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(rotatert:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	  (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
  "TARGET_BMI2"
  "rorx\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "rotatex")
   (set_attr "mode" "<MODE>")])

(define_insn "*<rotate_insn><mode>3_1"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
	(any_rotate:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ROTATEX:
      return "#";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "<rotate>{<imodesuffix>}\t%0";
      else
	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
    }
}
  [(set_attr "isa" "*,bmi2")
   (set_attr "type" "rotate,rotatex")
   (set (attr "length_immediate")
     (if_then_else
       (and (eq_attr "type" "rotate")
	    (and (match_operand 2 "const1_operand")
		 (ior (match_test "TARGET_SHIFT1")
		      (match_test "optimize_function_for_size_p (cfun)"))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

;; Convert rotate to the rotatex pattern to avoid flags dependency.
(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
		      (match_operand:QI 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
{
  int bitsize = GET_MODE_BITSIZE (<MODE>mode);

  operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
})

(define_split
  [(set (match_operand:SWI48 0 "register_operand")
	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
			(match_operand:QI 2 "const_int_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])

(define_insn "*bmi2_rorxsi3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
		       (match_operand:QI 2 "const_0_to_31_operand" "I"))))]
  "TARGET_64BIT && TARGET_BMI2"
  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
  [(set_attr "type" "rotatex")
   (set_attr "mode" "SI")])

(define_insn "*<rotate_insn>si3_1_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r")
	(zero_extend:DI
	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
{
  switch (get_attr_type (insn))
    {
    case TYPE_ROTATEX:
      return "#";

    default:
      if (operands[2] == const1_rtx
	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
	return "<rotate>{l}\t%k0";
      else
	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
    }
}
  [(set_attr "isa" "*,bmi2")
   (set_attr "type" "rotate,rotatex")
   (set (attr "length_immediate")
     (if_then_else
       (and (eq_attr "type" "rotate")
	    (and (match_operand 2 "const1_operand")
		 (ior (match_test "TARGET_SHIFT1")
		      (match_test "optimize_function_for_size_p (cfun)"))))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "SI")])

;; Convert rotate to the rotatex pattern to avoid flags dependency.
(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
		     (match_operand:QI 2 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
{
  int bitsize = GET_MODE_BITSIZE (SImode);

  operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
})

(define_split
  [(set (match_operand:DI 0 "register_operand")
	(zero_extend:DI
	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
		       (match_operand:QI 2 "const_int_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
  [(set (match_dup 0)
	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])

(define_insn "*<rotate_insn><mode>3_1"
  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<rotate>{<imodesuffix>}\t%0";
  else
    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "rotate")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "*<rotate_insn><mode>3_1_slp"
  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
	(any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0")
			  (match_operand:QI 2 "nonmemory_operand" "cI")))
   (clobber (reg:CC FLAGS_REG))]
  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
   && rtx_equal_p (operands[0], operands[1])"
{
  if (operands[2] == const1_rtx
      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
    return "<rotate>{<imodesuffix>}\t%0";
  else
    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
  [(set_attr "type" "rotate")
   (set (attr "length_immediate")
     (if_then_else
       (and (match_operand 2 "const1_operand")
	    (ior (match_test "TARGET_SHIFT1")
		 (match_test "optimize_function_for_size_p (cfun)")))
       (const_string "0")
       (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_split
 [(set (match_operand:HI 0 "QIreg_operand")
       (any_rotate:HI (match_dup 0) (const_int 8)))
  (clobber (reg:CC FLAGS_REG))]
 "reload_completed
  && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
 [(parallel [(set (strict_low_part (match_dup 0))
		  (bswap:HI (match_dup 0)))
	     (clobber (reg:CC FLAGS_REG))])])

;; Bit set / bit test instructions

;; %%% bts, btr, btc

;; These instructions are *slow* when applied to memory.

(define_code_attr btsc [(ior "bts") (xor "btc")])

(define_insn "*<btsc><mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(any_or:SWI48
	  (ashift:SWI48 (const_int 1)
			(match_operand:QI 2 "register_operand" "r"))
	  (match_operand:SWI48 1 "register_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT"
  "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "<MODE>")])

;; Avoid useless masking of count operand.
(define_insn_and_split "*<btsc><mode>_mask"
  [(set (match_operand:SWI48 0 "register_operand")
	(any_or:SWI48
	  (ashift:SWI48
	    (const_int 1)
	    (subreg:QI
	      (and:SI
		(match_operand:SI 1 "register_operand")
		(match_operand:SI 2 "const_int_operand")) 0))
	  (match_operand:SWI48 3 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT
   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_or:SWI48
	     (ashift:SWI48 (const_int 1)
			   (match_dup 1))
	     (match_dup 3)))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[1] = gen_lowpart (QImode, operands[1]);")

(define_insn_and_split "*<btsc><mode>_mask_1"
  [(set (match_operand:SWI48 0 "register_operand")
	(any_or:SWI48
	  (ashift:SWI48
	    (const_int 1)
	    (and:QI
	      (match_operand:QI 1 "register_operand")
	      (match_operand:QI 2 "const_int_operand")))
	  (match_operand:SWI48 3 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT
   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (any_or:SWI48
	     (ashift:SWI48 (const_int 1)
			   (match_dup 1))
	     (match_dup 3)))
      (clobber (reg:CC FLAGS_REG))])])

(define_insn "*btr<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(and:SWI48
	  (rotate:SWI48 (const_int -2)
			(match_operand:QI 2 "register_operand" "r"))
	(match_operand:SWI48 1 "register_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT"
  "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "<MODE>")])

;; Avoid useless masking of count operand.
(define_insn_and_split "*btr<mode>_mask"
  [(set (match_operand:SWI48 0 "register_operand")
	(and:SWI48
	  (rotate:SWI48
	    (const_int -2)
	    (subreg:QI
	      (and:SI
		(match_operand:SI 1 "register_operand")
		(match_operand:SI 2 "const_int_operand")) 0))
	  (match_operand:SWI48 3 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT
   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (and:SWI48
	     (rotate:SWI48 (const_int -2)
			   (match_dup 1))
	     (match_dup 3)))
      (clobber (reg:CC FLAGS_REG))])]
  "operands[1] = gen_lowpart (QImode, operands[1]);")

(define_insn_and_split "*btr<mode>_mask_1"
  [(set (match_operand:SWI48 0 "register_operand")
	(and:SWI48
	  (rotate:SWI48
	    (const_int -2)
	    (and:QI
	      (match_operand:QI 1 "register_operand")
	      (match_operand:QI 2 "const_int_operand")))
	  (match_operand:SWI48 3 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_BT
   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(parallel
     [(set (match_dup 0)
	   (and:SWI48
	     (rotate:SWI48 (const_int -2)
			   (match_dup 1))
	     (match_dup 3)))
      (clobber (reg:CC FLAGS_REG))])])

;; These instructions are never faster than the corresponding
;; and/ior/xor operations when using immediate operand, so with
;; 32-bit there's no point.  But in 64-bit, we can't hold the
;; relevant immediates within the instruction itself, so operating
;; on bits in the high 32-bits of a register becomes easier.
;;
;; These are slow on Nocona, but fast on Athlon64.  We do require the use
;; of btrq and btcq for corner cases of post-reload expansion of absdf and
;; negdf respectively, so they can never be disabled entirely.

(define_insn "*btsq_imm"
  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
			 (const_int 1)
			 (match_operand 1 "const_0_to_63_operand" "J"))
	(const_int 1))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
  "bts{q}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

(define_insn "*btrq_imm"
  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
			 (const_int 1)
			 (match_operand 1 "const_0_to_63_operand" "J"))
	(const_int 0))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
  "btr{q}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

(define_insn "*btcq_imm"
  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
			 (const_int 1)
			 (match_operand 1 "const_0_to_63_operand" "J"))
	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
  "btc{q}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "double")
   (set_attr "mode" "DI")])

;; Allow Nocona to avoid these instructions if a register is available.

(define_peephole2
  [(match_scratch:DI 2 "r")
   (parallel [(set (zero_extract:DI
		     (match_operand:DI 0 "nonimmediate_operand")
		     (const_int 1)
		     (match_operand 1 "const_0_to_63_operand"))
		   (const_int 1))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_64BIT && !TARGET_USE_BT"
  [(parallel [(set (match_dup 0)
		   (ior:DI (match_dup 0) (match_dup 3)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int i = INTVAL (operands[1]);

  operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);

  if (!x86_64_immediate_operand (operands[3], DImode))
    {
      emit_move_insn (operands[2], operands[3]);
      operands[3] = operands[2];
    }
})

(define_peephole2
  [(match_scratch:DI 2 "r")
   (parallel [(set (zero_extract:DI
		     (match_operand:DI 0 "nonimmediate_operand")
		     (const_int 1)
		     (match_operand 1 "const_0_to_63_operand"))
		   (const_int 0))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_64BIT && !TARGET_USE_BT"
  [(parallel [(set (match_dup 0)
		   (and:DI (match_dup 0) (match_dup 3)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int i = INTVAL (operands[1]);

  operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
 
  if (!x86_64_immediate_operand (operands[3], DImode))
    {
      emit_move_insn (operands[2], operands[3]);
      operands[3] = operands[2];
    }
})

(define_peephole2
  [(match_scratch:DI 2 "r")
   (parallel [(set (zero_extract:DI
		     (match_operand:DI 0 "nonimmediate_operand")
		     (const_int 1)
		     (match_operand 1 "const_0_to_63_operand"))
	      (not:DI (zero_extract:DI
			(match_dup 0) (const_int 1) (match_dup 1))))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_64BIT && !TARGET_USE_BT"
  [(parallel [(set (match_dup 0)
		   (xor:DI (match_dup 0) (match_dup 3)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  int i = INTVAL (operands[1]);

  operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);

  if (!x86_64_immediate_operand (operands[3], DImode))
    {
      emit_move_insn (operands[2], operands[3]);
      operands[3] = operands[2];
    }
})

;; %%% bt

(define_insn "*bt<mode>"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extract:SWI48
	    (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
	    (const_int 1)
	    (match_operand:SI 1 "nonmemory_operand" "r<S>,<S>"))
	  (const_int 0)))]
  ""
{
  switch (get_attr_mode (insn))
    {
    case MODE_SI:
      return "bt{l}\t{%1, %k0|%k0, %1}";

    case MODE_DI:
      return "bt{q}\t{%q1, %0|%0, %q1}";

    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set (attr "mode")
	(if_then_else
	  (and (match_test "CONST_INT_P (operands[1])")
	       (match_test "INTVAL (operands[1]) < 32"))
	  (const_string "SI")
	  (const_string "<MODE>")))])

(define_insn_and_split "*jcc_bt<mode>"
  [(set (pc)
  	(if_then_else (match_operator 0 "bt_comparison_operator"
			[(zero_extract:SWI48
			   (match_operand:SWI48 1 "nonimmediate_operand")
			   (const_int 1)
			   (match_operand:SI 2 "nonmemory_operand"))
			 (const_int 0)])
		      (label_ref (match_operand 3))
		      (pc)))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
   && (CONST_INT_P (operands[2])
       ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
	  && INTVAL (operands[2])
	       >= (optimize_function_for_size_p (cfun) ? 8 : 32))
       : !memory_operand (operands[1], <MODE>mode))
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extract:SWI48
	    (match_dup 1)
	    (const_int 1)
	    (match_dup 2))
	  (const_int 0)))
   (set (pc)
	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
		      (label_ref (match_dup 3))
		      (pc)))]
{
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})

(define_insn_and_split "*jcc_bt<mode>_1"
  [(set (pc)
  	(if_then_else (match_operator 0 "bt_comparison_operator"
			[(zero_extract:SWI48
			   (match_operand:SWI48 1 "register_operand")
			   (const_int 1)
			   (zero_extend:SI
			     (match_operand:QI 2 "register_operand")))
			 (const_int 0)])
		      (label_ref (match_operand 3))
		      (pc)))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extract:SWI48
	    (match_dup 1)
	    (const_int 1)
	    (match_dup 2))
	  (const_int 0)))
   (set (pc)
	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
		      (label_ref (match_dup 3))
		      (pc)))]
{
  operands[2] = lowpart_subreg (SImode, operands[2], QImode);
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})

;; Avoid useless masking of bit offset operand.
(define_insn_and_split "*jcc_bt<mode>_mask"
  [(set (pc)
  	(if_then_else (match_operator 0 "bt_comparison_operator"
			[(zero_extract:SWI48
			   (match_operand:SWI48 1 "register_operand")
			   (const_int 1)
			   (and:SI
			     (match_operand:SI 2 "register_operand")
			     (match_operand 3 "const_int_operand")))])
		      (label_ref (match_operand 4))
		      (pc)))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
      == GET_MODE_BITSIZE (<MODE>mode)-1
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC
	  (zero_extract:SWI48
	    (match_dup 1)
	    (const_int 1)
	    (match_dup 2))
	  (const_int 0)))
   (set (pc)
	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
		      (label_ref (match_dup 4))
		      (pc)))]
{
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})

;; Store-flag instructions.

;; For all sCOND expanders, also expand the compare or test insn that
;; generates cc0.  Generate an equality comparison if `seq' or `sne'.

(define_insn_and_split "*setcc_di_1"
  [(set (match_operand:DI 0 "register_operand" "=q")
	(match_operator:DI 1 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))]
  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
  "#"
  "&& reload_completed"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
  operands[2] = gen_lowpart (QImode, operands[0]);
})

(define_insn_and_split "*setcc_si_1_and"
  [(set (match_operand:SI 0 "register_operand" "=q")
	(match_operator:SI 1 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_PARTIAL_REG_STALL
   && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
  "#"
  "&& reload_completed"
  [(set (match_dup 2) (match_dup 1))
   (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
  operands[2] = gen_lowpart (QImode, operands[0]);
})

(define_insn_and_split "*setcc_si_1_movzbl"
  [(set (match_operand:SI 0 "register_operand" "=q")
	(match_operator:SI 1 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))]
  "!TARGET_PARTIAL_REG_STALL
   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
  "#"
  "&& reload_completed"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
  operands[2] = gen_lowpart (QImode, operands[0]);
})

(define_insn "*setcc_qi"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
	(match_operator:QI 1 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))]
  ""
  "set%C1\t%0"
  [(set_attr "type" "setcc")
   (set_attr "mode" "QI")])

(define_insn "*setcc_qi_slp"
  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
	(match_operator:QI 1 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))]
  ""
  "set%C1\t%0"
  [(set_attr "type" "setcc")
   (set_attr "mode" "QI")])

;; In general it is not safe to assume too much about CCmode registers,
;; so simplify-rtx stops when it sees a second one.  Under certain
;; conditions this is safe on x86, so help combine not create
;;
;;	seta	%al
;;	testb	%al, %al
;;	sete	%al

(define_split
  [(set (match_operand:QI 0 "nonimmediate_operand")
	(ne:QI (match_operator 1 "ix86_comparison_operator"
	         [(reg FLAGS_REG) (const_int 0)])
	    (const_int 0)))]
  ""
  [(set (match_dup 0) (match_dup 1))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
})

(define_split
  [(set (strict_low_part (match_operand:QI 0 "register_operand"))
	(ne:QI (match_operator 1 "ix86_comparison_operator"
	         [(reg FLAGS_REG) (const_int 0)])
	    (const_int 0)))]
  ""
  [(set (match_dup 0) (match_dup 1))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
})

(define_split
  [(set (match_operand:QI 0 "nonimmediate_operand")
	(eq:QI (match_operator 1 "ix86_comparison_operator"
	         [(reg FLAGS_REG) (const_int 0)])
	    (const_int 0)))]
  ""
  [(set (match_dup 0) (match_dup 1))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
  PUT_CODE (operands[1],
	    ix86_reverse_condition (GET_CODE (operands[1]),
				    GET_MODE (XEXP (operands[1], 0))));

  /* Make sure that (a) the CCmode we have for the flags is strong
     enough for the reversed compare or (b) we have a valid FP compare.  */
  if (! ix86_comparison_operator (operands[1], VOIDmode))
    FAIL;
})

(define_split
  [(set (strict_low_part (match_operand:QI 0 "register_operand"))
	(eq:QI (match_operator 1 "ix86_comparison_operator"
	         [(reg FLAGS_REG) (const_int 0)])
	    (const_int 0)))]
  ""
  [(set (match_dup 0) (match_dup 1))]
{
  operands[1] = shallow_copy_rtx (operands[1]);
  PUT_MODE (operands[1], QImode);
  PUT_CODE (operands[1],
  	    ix86_reverse_condition (GET_CODE (operands[1]),
				    GET_MODE (XEXP (operands[1], 0))));

  /* Make sure that (a) the CCmode we have for the flags is strong
     enough for the reversed compare or (b) we have a valid FP compare.  */
  if (! ix86_comparison_operator (operands[1], VOIDmode))
    FAIL;
})

;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
;; subsequent logical operations are used to imitate conditional moves.
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
;; it directly.

(define_insn "setcc_<mode>_sse"
  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
	(match_operator:MODEF 3 "sse_comparison_operator"
	  [(match_operand:MODEF 1 "register_operand" "0,x")
	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
  "SSE_FLOAT_MODE_P (<MODE>mode)"
  "@
   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "isa" "noavx,avx")
   (set_attr "type" "ssecmp")
   (set_attr "length_immediate" "1")
   (set_attr "prefix" "orig,vex")
   (set_attr "mode" "<MODE>")])

;; Basic conditional jump instructions.
;; We ignore the overflow flag for signed branch instructions.

(define_insn "*jcc"
  [(set (pc)
	(if_then_else (match_operator 1 "ix86_comparison_operator"
				      [(reg FLAGS_REG) (const_int 0)])
		      (label_ref (match_operand 0))
		      (pc)))]
  ""
  "%!%+j%C1\t%l0"
  [(set_attr "type" "ibr")
   (set_attr "modrm" "0")
   (set (attr "length")
	(if_then_else
	  (and (ge (minus (match_dup 0) (pc))
		   (const_int -126))
	       (lt (minus (match_dup 0) (pc))
		   (const_int 128)))
	  (const_int 2)
	  (const_int 6)))])

;; In general it is not safe to assume too much about CCmode registers,
;; so simplify-rtx stops when it sees a second one.  Under certain
;; conditions this is safe on x86, so help combine not create
;;
;;	seta	%al
;;	testb	%al, %al
;;	je	Lfoo

(define_split
  [(set (pc)
	(if_then_else (ne (match_operator 0 "ix86_comparison_operator"
				      [(reg FLAGS_REG) (const_int 0)])
			  (const_int 0))
		      (label_ref (match_operand 1))
		      (pc)))]
  ""
  [(set (pc)
	(if_then_else (match_dup 0)
		      (label_ref (match_dup 1))
		      (pc)))]
{
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_MODE (operands[0], VOIDmode);
})

(define_split
  [(set (pc)
	(if_then_else (eq (match_operator 0 "ix86_comparison_operator"
				      [(reg FLAGS_REG) (const_int 0)])
			  (const_int 0))
		      (label_ref (match_operand 1))
		      (pc)))]
  ""
  [(set (pc)
	(if_then_else (match_dup 0)
		      (label_ref (match_dup 1))
		      (pc)))]
{
  operands[0] = shallow_copy_rtx (operands[0]);
  PUT_MODE (operands[0], VOIDmode);
  PUT_CODE (operands[0],
  	    ix86_reverse_condition (GET_CODE (operands[0]),
				    GET_MODE (XEXP (operands[0], 0))));

  /* Make sure that (a) the CCmode we have for the flags is strong
     enough for the reversed compare or (b) we have a valid FP compare.  */
  if (! ix86_comparison_operator (operands[0], VOIDmode))
    FAIL;
})

;; Unconditional and other jump instructions

(define_insn "jump"
  [(set (pc)
	(label_ref (match_operand 0)))]
  ""
  "%!jmp\t%l0"
  [(set_attr "type" "ibr")
   (set_attr "modrm" "0")
   (set (attr "length")
	(if_then_else
	  (and (ge (minus (match_dup 0) (pc))
		   (const_int -126))
	       (lt (minus (match_dup 0) (pc))
		   (const_int 128)))
	  (const_int 2)
	  (const_int 5)))])

(define_expand "indirect_jump"
  [(set (pc) (match_operand 0 "indirect_branch_operand"))]
  ""
{
  if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
    operands[0] = convert_memory_address (word_mode, operands[0]);
  cfun->machine->has_local_indirect_jump = true;
})

(define_insn "*indirect_jump"
  [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
  ""
  "* return ix86_output_indirect_jmp (operands[0]);"
  [(set (attr "type")
     (if_then_else (match_test "(cfun->machine->indirect_branch_type
				 != indirect_branch_keep)")
	(const_string "multi")
	(const_string "ibr")))
   (set_attr "length_immediate" "0")])

(define_expand "tablejump"
  [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
	      (use (label_ref (match_operand 1)))])]
  ""
{
  /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
     relative.  Convert the relative address to an absolute address.  */
  if (flag_pic)
    {
      rtx op0, op1;
      enum rtx_code code;

      /* We can't use @GOTOFF for text labels on VxWorks;
	 see gotoff_operand.  */
      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
	{
	  code = PLUS;
	  op0 = operands[0];
	  op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
	}
      else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
	{
	  code = PLUS;
	  op0 = operands[0];
	  op1 = pic_offset_table_rtx;
	}
      else
	{
	  code = MINUS;
	  op0 = pic_offset_table_rtx;
	  op1 = operands[0];
	}

      operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
					 OPTAB_DIRECT);
    }

  if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
    operands[0] = convert_memory_address (word_mode, operands[0]);
  cfun->machine->has_local_indirect_jump = true;
})

(define_insn "*tablejump_1"
  [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
   (use (label_ref (match_operand 1)))]
  ""
  "* return ix86_output_indirect_jmp (operands[0]);"
  [(set (attr "type")
     (if_then_else (match_test "(cfun->machine->indirect_branch_type
				 != indirect_branch_keep)")
	(const_string "multi")
	(const_string "ibr")))
   (set_attr "length_immediate" "0")])

;; Convert setcc + movzbl to xor + setcc if operands don't overlap.

(define_peephole2
  [(set (reg FLAGS_REG) (match_operand 0))
   (set (match_operand:QI 1 "register_operand")
	(match_operator:QI 2 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (set (match_operand 3 "any_QIreg_operand")
	(zero_extend (match_dup 1)))]
  "(peep2_reg_dead_p (3, operands[1])
    || operands_match_p (operands[1], operands[3]))
   && ! reg_overlap_mentioned_p (operands[3], operands[0])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(set (match_dup 4) (match_dup 0))
   (set (strict_low_part (match_dup 5))
	(match_dup 2))]
{
  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[5] = gen_lowpart (QImode, operands[3]);
  ix86_expand_clear (operands[3]);
})

(define_peephole2
  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
	      (match_operand 4)])
   (set (match_operand:QI 1 "register_operand")
	(match_operator:QI 2 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (set (match_operand 3 "any_QIreg_operand")
	(zero_extend (match_dup 1)))]
  "(peep2_reg_dead_p (3, operands[1])
    || operands_match_p (operands[1], operands[3]))
   && ! reg_overlap_mentioned_p (operands[3], operands[0])
   && ! reg_overlap_mentioned_p (operands[3], operands[4])
   && ! reg_set_p (operands[3], operands[4])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 5) (match_dup 0))
	      (match_dup 4)])
   (set (strict_low_part (match_dup 6))
	(match_dup 2))]
{
  operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[6] = gen_lowpart (QImode, operands[3]);
  ix86_expand_clear (operands[3]);
})

(define_peephole2
  [(set (reg FLAGS_REG) (match_operand 0))
   (parallel [(set (reg FLAGS_REG) (match_operand 1))
	      (match_operand 5)])
   (set (match_operand:QI 2 "register_operand")
	(match_operator:QI 3 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (set (match_operand 4 "any_QIreg_operand")
	(zero_extend (match_dup 2)))]
  "(peep2_reg_dead_p (4, operands[2])
    || operands_match_p (operands[2], operands[4]))
   && ! reg_overlap_mentioned_p (operands[4], operands[0])
   && ! reg_overlap_mentioned_p (operands[4], operands[1])
   && ! reg_overlap_mentioned_p (operands[4], operands[5])
   && ! reg_set_p (operands[4], operands[5])
   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(set (match_dup 6) (match_dup 0))
   (parallel [(set (match_dup 7) (match_dup 1))
	      (match_dup 5)])
   (set (strict_low_part (match_dup 8))
	(match_dup 3))]
{
  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
  operands[8] = gen_lowpart (QImode, operands[4]);
  ix86_expand_clear (operands[4]);
})

;; Similar, but match zero extend with andsi3.

(define_peephole2
  [(set (reg FLAGS_REG) (match_operand 0))
   (set (match_operand:QI 1 "register_operand")
	(match_operator:QI 2 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
		   (and:SI (match_dup 3) (const_int 255)))
	      (clobber (reg:CC FLAGS_REG))])]
  "REGNO (operands[1]) == REGNO (operands[3])
   && ! reg_overlap_mentioned_p (operands[3], operands[0])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(set (match_dup 4) (match_dup 0))
   (set (strict_low_part (match_dup 5))
	(match_dup 2))]
{
  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[5] = gen_lowpart (QImode, operands[3]);
  ix86_expand_clear (operands[3]);
})

(define_peephole2
  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
	      (match_operand 4)])
   (set (match_operand:QI 1 "register_operand")
	(match_operator:QI 2 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (parallel [(set (match_operand 3 "any_QIreg_operand")
		   (zero_extend (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])]
  "(peep2_reg_dead_p (3, operands[1])
    || operands_match_p (operands[1], operands[3]))
   && ! reg_overlap_mentioned_p (operands[3], operands[0])
   && ! reg_overlap_mentioned_p (operands[3], operands[4])
   && ! reg_set_p (operands[3], operands[4])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 5) (match_dup 0))
	      (match_dup 4)])
   (set (strict_low_part (match_dup 6))
	(match_dup 2))]
{
  operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[6] = gen_lowpart (QImode, operands[3]);
  ix86_expand_clear (operands[3]);
})

(define_peephole2
  [(set (reg FLAGS_REG) (match_operand 0))
   (parallel [(set (reg FLAGS_REG) (match_operand 1))
	      (match_operand 5)])
   (set (match_operand:QI 2 "register_operand")
	(match_operator:QI 3 "ix86_comparison_operator"
	  [(reg FLAGS_REG) (const_int 0)]))
   (parallel [(set (match_operand 4 "any_QIreg_operand")
		   (zero_extend (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
  "(peep2_reg_dead_p (4, operands[2])
    || operands_match_p (operands[2], operands[4]))
   && ! reg_overlap_mentioned_p (operands[4], operands[0])
   && ! reg_overlap_mentioned_p (operands[4], operands[1])
   && ! reg_overlap_mentioned_p (operands[4], operands[5])
   && ! reg_set_p (operands[4], operands[5])
   && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(set (match_dup 6) (match_dup 0))
   (parallel [(set (match_dup 7) (match_dup 1))
	      (match_dup 5)])
   (set (strict_low_part (match_dup 8))
	(match_dup 3))]
{
  operands[6] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
  operands[7] = gen_rtx_REG (GET_MODE (operands[1]), FLAGS_REG);
  operands[8] = gen_lowpart (QImode, operands[4]);
  ix86_expand_clear (operands[4]);
})

;; Call instructions.

;; The predicates normally associated with named expanders are not properly
;; checked for calls.  This is a bug in the generic code, but it isn't that
;; easy to fix.  Ignore it for now and be prepared to fix things up.

;; P6 processors will jump to the address after the decrement when %esp
;; is used as a call operand, so they will execute return address as a code.
;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.

;; Register constraint for call instruction.
(define_mode_attr c [(SI "l") (DI "r")])

;; Call subroutine returning no value.

(define_expand "call"
  [(call (match_operand:QI 0)
	 (match_operand 1))
   (use (match_operand 2))]
  ""
{
  ix86_expand_call (NULL, operands[0], operands[1],
		    operands[2], NULL, false);
  DONE;
})

(define_expand "sibcall"
  [(call (match_operand:QI 0)
	 (match_operand 1))
   (use (match_operand 2))]
  ""
{
  ix86_expand_call (NULL, operands[0], operands[1],
		    operands[2], NULL, true);
  DONE;
})

(define_insn "*call"
  [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
	 (match_operand 1))]
  "!SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

;; This covers both call and sibcall since only GOT slot is allowed.
(define_insn "*call_got_x32"
  [(call (mem:QI (zero_extend:DI
		   (match_operand:SI 0 "GOT_memory_operand" "Bg")))
	 (match_operand 1))]
  "TARGET_X32"
{
  rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
  return ix86_output_call_insn (insn, fnaddr);
}
  [(set_attr "type" "call")])

;; Since sibcall never returns, we can only use call-clobbered register
;; as GOT base.
(define_insn "*sibcall_GOT_32"
  [(call (mem:QI
	   (mem:SI (plus:SI
		     (match_operand:SI 0 "register_no_elim_operand" "U")
		     (match_operand:SI 1 "GOT32_symbol_operand"))))
	 (match_operand 2))]
  "!TARGET_MACHO
  && !TARGET_64BIT
  && !TARGET_INDIRECT_BRANCH_REGISTER
  && SIBLING_CALL_P (insn)"
{
  rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
  fnaddr = gen_const_mem (SImode, fnaddr);
  return ix86_output_call_insn (insn, fnaddr);
}
  [(set_attr "type" "call")])

(define_insn "*sibcall"
  [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
	 (match_operand 1))]
  "SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

(define_insn "*sibcall_memory"
  [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
	 (match_operand 1))
   (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

(define_peephole2
  [(set (match_operand:W 0 "register_operand")
	(match_operand:W 1 "memory_operand"))
   (call (mem:QI (match_dup 0))
	 (match_operand 3))]
  "!TARGET_X32
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (peep2_next_insn (1))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
  [(parallel [(call (mem:QI (match_dup 1))
		    (match_dup 3))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_peephole2
  [(set (match_operand:W 0 "register_operand")
	(match_operand:W 1 "memory_operand"))
   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (call (mem:QI (match_dup 0))
	 (match_operand 3))]
  "!TARGET_X32
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (peep2_next_insn (2))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(call (mem:QI (match_dup 1))
		    (match_dup 3))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_expand "call_pop"
  [(parallel [(call (match_operand:QI 0)
		    (match_operand:SI 1))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 3)))])]
  "!TARGET_64BIT"
{
  ix86_expand_call (NULL, operands[0], operands[1],
		    operands[2], operands[3], false);
  DONE;
})

(define_insn "*call_pop"
  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
	 (match_operand 1))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 2 "immediate_operand" "i")))]
  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

(define_insn "*sibcall_pop"
  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
	 (match_operand 1))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 2 "immediate_operand" "i")))]
  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

(define_insn "*sibcall_pop_memory"
  [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
	 (match_operand 1))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 2 "immediate_operand" "i")))
   (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
  "!TARGET_64BIT"
  "* return ix86_output_call_insn (insn, operands[0]);"
  [(set_attr "type" "call")])

(define_peephole2
  [(set (match_operand:SI 0 "register_operand")
	(match_operand:SI 1 "memory_operand"))
   (parallel [(call (mem:QI (match_dup 0))
		    (match_operand 3))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4 "immediate_operand")))])]
  "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
  [(parallel [(call (mem:QI (match_dup 1))
		    (match_dup 3))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_dup 4)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_peephole2
  [(set (match_operand:SI 0 "register_operand")
	(match_operand:SI 1 "memory_operand"))
   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(call (mem:QI (match_dup 0))
		    (match_operand 3))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4 "immediate_operand")))])]
  "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(call (mem:QI (match_dup 1))
		    (match_dup 3))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_dup 4)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

;; Combining simple memory jump instruction

(define_peephole2
  [(set (match_operand:W 0 "register_operand")
        (match_operand:W 1 "memory_operand"))
   (set (pc) (match_dup 0))]
  "!TARGET_X32
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && peep2_reg_dead_p (2, operands[0])"
  [(set (pc) (match_dup 1))])

;; Call subroutine, returning value in operand 0

(define_expand "call_value"
  [(set (match_operand 0)
	(call (match_operand:QI 1)
	      (match_operand 2)))
   (use (match_operand 3))]
  ""
{
  ix86_expand_call (operands[0], operands[1], operands[2],
		    operands[3], NULL, false);
  DONE;
})

(define_expand "sibcall_value"
  [(set (match_operand 0)
	(call (match_operand:QI 1)
	      (match_operand 2)))
   (use (match_operand 3))]
  ""
{
  ix86_expand_call (operands[0], operands[1], operands[2],
		    operands[3], NULL, true);
  DONE;
})

(define_insn "*call_value"
  [(set (match_operand 0)
	(call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
	      (match_operand 2)))]
  "!SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

;; This covers both call and sibcall since only GOT slot is allowed.
(define_insn "*call_value_got_x32"
  [(set (match_operand 0)
	(call (mem:QI
		(zero_extend:DI
		  (match_operand:SI 1 "GOT_memory_operand" "Bg")))
	      (match_operand 2)))]
  "TARGET_X32"
{
  rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
  return ix86_output_call_insn (insn, fnaddr);
}
  [(set_attr "type" "callv")])

;; Since sibcall never returns, we can only use call-clobbered register
;; as GOT base.
(define_insn "*sibcall_value_GOT_32"
  [(set (match_operand 0)
        (call (mem:QI
		(mem:SI (plus:SI
			  (match_operand:SI 1 "register_no_elim_operand" "U")
			  (match_operand:SI 2 "GOT32_symbol_operand"))))
	 (match_operand 3)))]
  "!TARGET_MACHO
   && !TARGET_64BIT
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (insn)"
{
  rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
  fnaddr = gen_const_mem (SImode, fnaddr);
  return ix86_output_call_insn (insn, fnaddr);
}
  [(set_attr "type" "callv")])

(define_insn "*sibcall_value"
  [(set (match_operand 0)
	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
	      (match_operand 2)))]
  "SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

(define_insn "*sibcall_value_memory"
  [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "memory_operand" "m"))
	      (match_operand 2)))
   (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

(define_peephole2
  [(set (match_operand:W 0 "register_operand")
	(match_operand:W 1 "memory_operand"))
   (set (match_operand 2)
   (call (mem:QI (match_dup 0))
		 (match_operand 3)))]
  "!TARGET_X32
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (peep2_next_insn (1))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
  [(parallel [(set (match_dup 2)
		   (call (mem:QI (match_dup 1))
			 (match_dup 3)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_peephole2
  [(set (match_operand:W 0 "register_operand")
	(match_operand:W 1 "memory_operand"))
   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (set (match_operand 2)
	(call (mem:QI (match_dup 0))
	      (match_operand 3)))]
  "!TARGET_X32
   && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (peep2_next_insn (2))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(set (match_dup 2)
		   (call (mem:QI (match_dup 1))
			 (match_dup 3)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_expand "call_value_pop"
  [(parallel [(set (match_operand 0)
		   (call (match_operand:QI 1)
			 (match_operand:SI 2)))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4)))])]
  "!TARGET_64BIT"
{
  ix86_expand_call (operands[0], operands[1], operands[2],
		    operands[3], operands[4], false);
  DONE;
})

(define_insn "*call_value_pop"
  [(set (match_operand 0)
	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
	      (match_operand 2)))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 3 "immediate_operand" "i")))]
  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

(define_insn "*sibcall_value_pop"
  [(set (match_operand 0)
	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
	      (match_operand 2)))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 3 "immediate_operand" "i")))]
  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

(define_insn "*sibcall_value_pop_memory"
  [(set (match_operand 0)
 	(call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
	      (match_operand 2)))
   (set (reg:SI SP_REG)
	(plus:SI (reg:SI SP_REG)
		 (match_operand:SI 3 "immediate_operand" "i")))
   (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
  "!TARGET_64BIT"
  "* return ix86_output_call_insn (insn, operands[1]);"
  [(set_attr "type" "callv")])

(define_peephole2
  [(set (match_operand:SI 0 "register_operand")
	(match_operand:SI 1 "memory_operand"))
   (parallel [(set (match_operand 2)
		   (call (mem:QI (match_dup 0))
			 (match_operand 3)))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4 "immediate_operand")))])]
  "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
  [(parallel [(set (match_dup 2)
		   (call (mem:QI (match_dup 1))
			 (match_dup 3)))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_dup 4)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_peephole2
  [(set (match_operand:SI 0 "register_operand")
	(match_operand:SI 1 "memory_operand"))
   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(set (match_operand 2)
		   (call (mem:QI (match_dup 0))
			 (match_operand 3)))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4 "immediate_operand")))])]
  "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
   && !reg_mentioned_p (operands[0],
			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
   (parallel [(set (match_dup 2)
		   (call (mem:QI (match_dup 1))
			 (match_dup 3)))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_dup 4)))
	      (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])

(define_expand "sibcall_value_pop"
  [(parallel [(set (match_operand 0 "" "")
		   (call (match_operand:QI 1 "" "")
			 (match_operand:SI 2 "" "")))
	      (set (reg:SI SP_REG)
		   (plus:SI (reg:SI SP_REG)
			    (match_operand:SI 4 "" "")))])]
  "!TARGET_64BIT"
{
  ix86_expand_call (operands[0], operands[1], operands[2],
		    operands[3], operands[4], 1);
  DONE;
})


;; Call subroutine returning any type.

(define_expand "untyped_call"
  [(parallel [(call (match_operand 0)
		    (const_int 0))
	      (match_operand 1)
	      (match_operand 2)])]
  ""
{
  int i;

  /* In order to give reg-stack an easier job in validating two
     coprocessor registers as containing a possible return value,
     simply pretend the untyped call returns a complex long double
     value. 

     We can't use SSE_REGPARM_MAX here since callee is unprototyped
     and should have the default ABI.  */

  ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
		    operands[0], const0_rtx,
		    GEN_INT ((TARGET_64BIT
			      ? (ix86_abi == SYSV_ABI
				 ? X86_64_SSE_REGPARM_MAX
				 : X86_64_MS_SSE_REGPARM_MAX)
			      : X86_32_SSE_REGPARM_MAX)
		    	     - 1),
		    NULL, false);

  for (i = 0; i < XVECLEN (operands[2], 0); i++)
    {
      rtx set = XVECEXP (operands[2], 0, i);
      emit_move_insn (SET_DEST (set), SET_SRC (set));
    }

  /* The optimizer does not know that the call sets the function value
     registers we stored in the result block.  We avoid problems by
     claiming that all hard registers are used and clobbered at this
     point.  */
  emit_insn (gen_blockage ());

  DONE;
})

;; Prologue and epilogue instructions

;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
;; all of memory.  This blocks insns from being moved across this point.

(define_insn "blockage"
  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
  ""
  ""
  [(set_attr "length" "0")])

;; Do not schedule instructions accessing memory across this point.

(define_expand "memory_blockage"
  [(set (match_dup 0)
	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
  ""
{
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
  MEM_VOLATILE_P (operands[0]) = 1;
})

(define_insn "*memory_blockage"
  [(set (match_operand:BLK 0)
	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
  ""
  ""
  [(set_attr "length" "0")])

;; As USE insns aren't meaningful after reload, this is used instead
;; to prevent deleting instructions setting registers for PIC code
(define_insn "prologue_use"
  [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
  ""
  ""
  [(set_attr "length" "0")])

;; Insn emitted into the body of a function to return from a function.
;; This is only done if the function's epilogue is known to be simple.
;; See comments for ix86_can_use_return_insn_p in i386.c.

(define_expand "return"
  [(simple_return)]
  "ix86_can_use_return_insn_p ()"
{
  if (crtl->args.pops_args)
    {
      rtx popc = GEN_INT (crtl->args.pops_args);
      emit_jump_insn (gen_simple_return_pop_internal (popc));
      DONE;
    }
})

;; We need to disable this for TARGET_SEH, as otherwise
;; shrink-wrapped prologue gets enabled too.  This might exceed
;; the maximum size of prologue in unwind information.
;; Also disallow shrink-wrapping if using stack slot to pass the
;; static chain pointer - the first instruction has to be pushl %esi
;; and it can't be moved around, as we use alternate entry points
;; in that case.

(define_expand "simple_return"
  [(simple_return)]
  "!TARGET_SEH && !ix86_static_chain_on_stack"
{
  if (crtl->args.pops_args)
    {
      rtx popc = GEN_INT (crtl->args.pops_args);
      emit_jump_insn (gen_simple_return_pop_internal (popc));
      DONE;
    }
})

(define_insn "simple_return_internal"
  [(simple_return)]
  "reload_completed"
  "* return ix86_output_function_return (false);"
  [(set_attr "length" "1")
   (set_attr "atom_unit" "jeu")
   (set_attr "length_immediate" "0")
   (set_attr "modrm" "0")])

(define_insn "interrupt_return"
  [(simple_return)
   (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
  "reload_completed"
{
  return TARGET_64BIT ? "iretq" : "iret";
})

;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
;; instruction Athlon and K8 have.

(define_insn "simple_return_internal_long"
  [(simple_return)
   (unspec [(const_int 0)] UNSPEC_REP)]
  "reload_completed"
  "* return ix86_output_function_return (true);"
  [(set_attr "length" "2")
   (set_attr "atom_unit" "jeu")
   (set_attr "length_immediate" "0")
   (set_attr "prefix_rep" "1")
   (set_attr "modrm" "0")])

(define_insn_and_split "simple_return_pop_internal"
  [(simple_return)
   (use (match_operand:SI 0 "const_int_operand"))]
  "reload_completed"
  "%!ret\t%0"
  "&& cfun->machine->function_return_type != indirect_branch_keep"
  [(const_int 0)]
  "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
  [(set_attr "length" "3")
   (set_attr "atom_unit" "jeu")
   (set_attr "length_immediate" "2")
   (set_attr "modrm" "0")])

(define_expand "simple_return_indirect_internal"
  [(parallel
     [(simple_return)
      (use (match_operand 0 "register_operand"))])])

(define_insn "*simple_return_indirect_internal<mode>"
  [(simple_return)
   (use (match_operand:W 0 "register_operand" "r"))]
  "reload_completed"
  "* return ix86_output_indirect_function_return (operands[0]);"
  [(set (attr "type")
     (if_then_else (match_test "(cfun->machine->indirect_branch_type
				 != indirect_branch_keep)")
	(const_string "multi")
	(const_string "ibr")))
   (set_attr "length_immediate" "0")])

(define_insn "nop"
  [(const_int 0)]
  ""
  "nop"
  [(set_attr "length" "1")
   (set_attr "length_immediate" "0")
   (set_attr "modrm" "0")])

;; Generate nops.  Operand 0 is the number of nops, up to 8.
(define_insn "nops"
  [(unspec_volatile [(match_operand 0 "const_int_operand")]
		    UNSPECV_NOPS)]
  "reload_completed"
{
  int num = INTVAL (operands[0]);

  gcc_assert (IN_RANGE (num, 1, 8));

  while (num--)
    fputs ("\tnop\n", asm_out_file);

  return "";
}
  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
   (set_attr "length_immediate" "0")
   (set_attr "modrm" "0")])

;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
;; branch prediction penalty for the third jump in a 16-byte
;; block on K8.

(define_insn "pad"
  [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
  ""
{
#ifdef ASM_OUTPUT_MAX_SKIP_PAD
  ASM_OUTPUT_MAX_SKIP_PAD (asm_out_file, 4, (int)INTVAL (operands[0]));
#else
  /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
     The align insn is used to avoid 3 jump instructions in the row to improve
     branch prediction and the benefits hardly outweigh the cost of extra 8
     nops on the average inserted by full alignment pseudo operation.  */
#endif
  return "";
}
  [(set_attr "length" "16")])

(define_expand "prologue"
  [(const_int 0)]
  ""
  "ix86_expand_prologue (); DONE;")

(define_expand "set_got"
  [(parallel
     [(set (match_operand:SI 0 "register_operand")
	   (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
      (clobber (reg:CC FLAGS_REG))])]
  "!TARGET_64BIT"
{
  if (flag_pic && !TARGET_VXWORKS_RTP)
    ix86_pc_thunk_call_expanded = true;
})

(define_insn "*set_got"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT"
  "* return output_set_got (operands[0], NULL_RTX);"
  [(set_attr "type" "multi")
   (set_attr "length" "12")])

(define_expand "set_got_labelled"
  [(parallel
     [(set (match_operand:SI 0 "register_operand")
	   (unspec:SI [(label_ref (match_operand 1))]
		      UNSPEC_SET_GOT))
      (clobber (reg:CC FLAGS_REG))])]
  "!TARGET_64BIT"
{
  if (flag_pic && !TARGET_VXWORKS_RTP)
    ix86_pc_thunk_call_expanded = true;
})

(define_insn "*set_got_labelled"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec:SI [(label_ref (match_operand 1))]
	 UNSPEC_SET_GOT))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT"
  "* return output_set_got (operands[0], operands[1]);"
  [(set_attr "type" "multi")
   (set_attr "length" "12")])

(define_insn "set_got_rex64"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
  "TARGET_64BIT"
  "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
  [(set_attr "type" "lea")
   (set_attr "length_address" "4")
   (set_attr "mode" "DI")])

(define_insn "set_rip_rex64"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
  "TARGET_64BIT"
  "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
  [(set_attr "type" "lea")
   (set_attr "length_address" "4")
   (set_attr "mode" "DI")])

(define_insn "set_got_offset_rex64"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unspec:DI
	  [(label_ref (match_operand 1))]
	  UNSPEC_SET_GOT_OFFSET))]
  "TARGET_LP64"
  "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
  [(set_attr "type" "imov")
   (set_attr "length_immediate" "0")
   (set_attr "length_address" "8")
   (set_attr "mode" "DI")])

(define_expand "epilogue"
  [(const_int 0)]
  ""
  "ix86_expand_epilogue (1); DONE;")

(define_expand "sibcall_epilogue"
  [(const_int 0)]
  ""
  "ix86_expand_epilogue (0); DONE;")

(define_expand "eh_return"
  [(use (match_operand 0 "register_operand"))]
  ""
{
  rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];

  /* Tricky bit: we write the address of the handler to which we will
     be returning into someone else's stack frame, one word below the
     stack address we wish to restore.  */
  tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
  tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
  /* Return address is always in word_mode.  */
  tmp = gen_rtx_MEM (word_mode, tmp);
  if (GET_MODE (ra) != word_mode)
    ra = convert_to_mode (word_mode, ra, 1);
  emit_move_insn (tmp, ra);

  emit_jump_insn (gen_eh_return_internal ());
  emit_barrier ();
  DONE;
})

(define_insn_and_split "eh_return_internal"
  [(eh_return)]
  ""
  "#"
  "epilogue_completed"
  [(const_int 0)]
  "ix86_expand_epilogue (2); DONE;")

(define_expand "@leave_<mode>"
  [(parallel
    [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
     (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
     (clobber (mem:BLK (scratch)))])]
  ""
  "operands[0] = GEN_INT (<MODE_SIZE>);")

(define_insn "*leave"
  [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
   (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
   (clobber (mem:BLK (scratch)))]
  "!TARGET_64BIT"
  "leave"
  [(set_attr "type" "leave")])

(define_insn "*leave_rex64"
  [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
   (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
   (clobber (mem:BLK (scratch)))]
  "TARGET_64BIT"
  "leave"
  [(set_attr "type" "leave")])

;; Handle -fsplit-stack.

(define_expand "split_stack_prologue"
  [(const_int 0)]
  ""
{
  ix86_expand_split_stack_prologue ();
  DONE;
})

;; In order to support the call/return predictor, we use a return
;; instruction which the middle-end doesn't see.
(define_insn "split_stack_return"
  [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
		     UNSPECV_SPLIT_STACK_RETURN)]
  ""
{
  if (operands[0] == const0_rtx)
    return "ret";
  else
    return "ret\t%0";
}
  [(set_attr "atom_unit" "jeu")
   (set_attr "modrm" "0")
   (set (attr "length")
	(if_then_else (match_operand:SI 0 "const0_operand")
		      (const_int 1)
		      (const_int 3)))
   (set (attr "length_immediate")
	(if_then_else (match_operand:SI 0 "const0_operand")
		      (const_int 0)
		      (const_int 2)))])

;; If there are operand 0 bytes available on the stack, jump to
;; operand 1.

(define_expand "split_stack_space_check"
  [(set (pc) (if_then_else
	      (ltu (minus (reg SP_REG)
			  (match_operand 0 "register_operand"))
		   (match_dup 2))
	      (label_ref (match_operand 1))
	      (pc)))]
  ""
{
  rtx reg = gen_reg_rtx (Pmode);

  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));

  operands[2] = ix86_split_stack_guard ();
  ix86_expand_branch (GEU, reg, operands[2], operands[1]);

  DONE;
})

;; Bit manipulation instructions.

(define_expand "ffs<mode>2"
  [(set (match_dup 2) (const_int -1))
   (parallel [(set (match_dup 3) (match_dup 4))
	      (set (match_operand:SWI48 0 "register_operand")
		   (ctz:SWI48
		     (match_operand:SWI48 1 "nonimmediate_operand")))])
   (set (match_dup 0) (if_then_else:SWI48
			(eq (match_dup 3) (const_int 0))
			(match_dup 2)
			(match_dup 0)))
   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  machine_mode flags_mode;

  if (<MODE>mode == SImode && !TARGET_CMOVE)
    {
      emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
      DONE;
    }

  flags_mode = TARGET_BMI ? CCCmode : CCZmode;

  operands[2] = gen_reg_rtx (<MODE>mode);
  operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
  operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
})

(define_insn_and_split "ffssi2_no_cmove"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
   (clobber (match_scratch:SI 2 "=&q"))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_CMOVE"
  "#"
  "&& reload_completed"
  [(parallel [(set (match_dup 4) (match_dup 5))
	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
   (set (strict_low_part (match_dup 3))
	(eq:QI (match_dup 4) (const_int 0)))
   (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;

  operands[3] = gen_lowpart (QImode, operands[2]);
  operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
  operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);

  ix86_expand_clear (operands[2]);
})

(define_insn_and_split "*tzcnt<mode>_1"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
		     (const_int 0)))
   (set (match_operand:SWI48 0 "register_operand" "=r")
	(ctz:SWI48 (match_dup 1)))]
  "TARGET_BMI"
  "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (reg:CCC FLAGS_REG)
	  (compare:CCC (match_dup 1) (const_int 0)))
     (set (match_dup 0)
	  (ctz:SWI48 (match_dup 1)))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*tzcnt<mode>_1_falsedep"
  [(set (reg:CCC FLAGS_REG)
	(compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
		     (const_int 0)))
   (set (match_operand:SWI48 0 "register_operand" "=r")
	(ctz:SWI48 (match_dup 1)))
   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)]
  "TARGET_BMI"
  "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bsf<mode>_1"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
		     (const_int 0)))
   (set (match_operand:SWI48 0 "register_operand" "=r")
	(ctz:SWI48 (match_dup 1)))]
  ""
  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "btver2_decode" "double")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "ctz<mode>2"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(ctz:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  ""
{
  if (TARGET_BMI)
    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
  else if (optimize_function_for_size_p (cfun))
    ;
  else if (TARGET_GENERIC)
    /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI.  */
    return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";

  return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
}
  "(TARGET_BMI || TARGET_GENERIC)
   && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (match_dup 0)
	  (ctz:SWI48 (match_dup 1)))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
     (clobber (reg:CC FLAGS_REG))])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set (attr "prefix_rep")
     (if_then_else
       (ior (match_test "TARGET_BMI")
	    (and (not (match_test "optimize_function_for_size_p (cfun)"))
		 (match_test "TARGET_GENERIC")))
       (const_string "1")
       (const_string "0")))
   (set_attr "mode" "<MODE>")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*ctz<mode>2_falsedep"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(ctz:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)
   (clobber (reg:CC FLAGS_REG))]
  ""
{
  if (TARGET_BMI)
    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
  else if (TARGET_GENERIC)
    /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI.  */
    return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
  else
    gcc_unreachable ();
}
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "mode" "<MODE>")])

(define_insn "bsr_rex64"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(minus:DI (const_int 63)
		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT"
  "bsr{q}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "DI")])

(define_insn "bsr"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(minus:SI (const_int 31)
		  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "bsr{l}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "SI")])

(define_insn "*bsrhi"
  [(set (match_operand:HI 0 "register_operand" "=r")
	(minus:HI (const_int 15)
		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "bsr{w}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "prefix_0f" "1")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "HI")])

(define_expand "clz<mode>2"
  [(parallel
     [(set (match_operand:SWI48 0 "register_operand")
	   (minus:SWI48
	     (match_dup 2)
	     (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand"))))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  if (TARGET_LZCNT)
    {
      emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
      DONE;
    }
  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
})

(define_insn_and_split "clz<mode>2_lzcnt"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(clz:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_LZCNT"
  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (match_dup 0)
	  (clz:SWI48 (match_dup 1)))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
     (clobber (reg:CC FLAGS_REG))])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*clz<mode>2_lzcnt_falsedep"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(clz:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_LZCNT"
  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_int_iterator LT_ZCNT
	[(UNSPEC_TZCNT "TARGET_BMI")
	 (UNSPEC_LZCNT "TARGET_LZCNT")])

(define_int_attr lt_zcnt
	[(UNSPEC_TZCNT "tzcnt")
	 (UNSPEC_LZCNT "lzcnt")])

(define_int_attr lt_zcnt_type
	[(UNSPEC_TZCNT "alu1")
	 (UNSPEC_LZCNT "bitmanip")])

;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version
;; provides operand size as output when source operand is zero. 

(define_insn_and_split "<lt_zcnt>_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(unspec:SWI48
	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (match_dup 0)
	  (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
     (clobber (reg:CC FLAGS_REG))])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "type" "<lt_zcnt_type>")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "mode" "<MODE>")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*<lt_zcnt>_<mode>_falsedep"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(unspec:SWI48
	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)
   (clobber (reg:CC FLAGS_REG))]
  ""
  "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "<lt_zcnt_type>")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "mode" "<MODE>")])

(define_insn "<lt_zcnt>_hi"
  [(set (match_operand:HI 0 "register_operand" "=r")
	(unspec:HI
	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
  [(set_attr "type" "<lt_zcnt_type>")
   (set_attr "prefix_0f" "1")
   (set_attr "prefix_rep" "1")
   (set_attr "mode" "HI")])

;; BMI instructions.

(define_insn "bmi_bextr_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
	(unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
		       (match_operand:SWI48 2 "register_operand" "r,r")]
		      UNSPEC_BEXTR))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI"
  "bextr\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "direct, double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_bextr_<mode>_ccz"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
			 (match_operand:SWI48 2 "register_operand" "r,r")]
			UNSPEC_BEXTR)
	  (const_int 0)))
   (clobber (match_scratch:SWI48 0 "=r,r"))]
  "TARGET_BMI"
  "bextr\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "direct, double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_blsi_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
          (neg:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI"
  "blsi\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_blsmsk_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (xor:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int -1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI"
  "blsmsk\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_blsr_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int -1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI"
   "blsr\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_blsr_<mode>_cmp"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (and:SWI48
	    (plus:SWI48
	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	      (const_int -1))
	    (match_dup 1))
	  (const_int 0)))
   (set (match_operand:SWI48 0 "register_operand" "=r")
	(and:SWI48
	  (plus:SWI48
	    (match_dup 1)
	    (const_int -1))
	  (match_dup 1)))]
   "TARGET_BMI"
   "blsr\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi_blsr_<mode>_ccz"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (and:SWI48
	    (plus:SWI48
	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	      (const_int -1))
	    (match_dup 1))
	  (const_int 0)))
   (clobber (match_scratch:SWI48 0 "=r"))]
   "TARGET_BMI"
   "blsr\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "<MODE>")])

;; BMI2 instructions.
(define_expand "bmi2_bzhi_<mode>3"
  [(parallel
    [(set (match_operand:SWI48 0 "register_operand")
	  (if_then_else:SWI48
	    (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand")
			      (const_int 255))
		   (const_int 0))
	    (zero_extract:SWI48
	      (match_operand:SWI48 1 "nonimmediate_operand")
	      (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
			  (match_dup 3))
	      (const_int 0))
	    (const_int 0)))
     (clobber (reg:CC FLAGS_REG))])]
  "TARGET_BMI2"
  "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")

(define_insn "*bmi2_bzhi_<mode>3"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(if_then_else:SWI48
	  (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
			    (const_int 255))
		 (const_int 0))
	  (zero_extract:SWI48
	    (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	    (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
			(match_operand:SWI48 3 "const_int_operand" "n"))
	    (const_int 0))
	  (const_int 0)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
  "bzhi\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi2_bzhi_<mode>3_1"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(if_then_else:SWI48
	  (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
	  (zero_extract:SWI48
	    (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	    (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
			(match_operand:SWI48 3 "const_int_operand" "n"))
	    (const_int 0))
	  (const_int 0)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi2_bzhi_<mode>3_1_ccz"
  [(set (reg:CCZ FLAGS_REG)
	(compare:CCZ
	  (if_then_else:SWI48
	    (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
	    (zero_extract:SWI48
	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
	      (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
			  (match_operand:SWI48 3 "const_int_operand" "n"))
	      (const_int 0))
	    (const_int 0))
	(const_int 0)))
   (clobber (match_scratch:SWI48 0 "=r"))]
  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi2_bzhi_<mode>3_2"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(and:SWI48
	  (plus:SWI48
	    (ashift:SWI48 (const_int 1)
			  (match_operand:QI 2 "register_operand" "r"))
	    (const_int -1))
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2"
  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "*bmi2_bzhi_<mode>3_3"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(and:SWI48
	  (not:SWI48
	    (ashift:SWI48 (const_int -1)
			  (match_operand:QI 2 "register_operand" "r")))
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2"
  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "bmi2_pdep_<mode>3"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
                       (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
                       UNSPEC_PDEP))]
  "TARGET_BMI2"
  "pdep\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

(define_insn "bmi2_pext_<mode>3"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
                       (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
                       UNSPEC_PEXT))]
  "TARGET_BMI2"
  "pext\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "bitmanip")
   (set_attr "prefix" "vex")
   (set_attr "mode" "<MODE>")])

;; TBM instructions.
(define_expand "tbm_bextri_<mode>"
  [(parallel
    [(set (match_operand:SWI48 0 "register_operand")
	  (zero_extract:SWI48
	    (match_operand:SWI48 1 "nonimmediate_operand")
	    (match_operand 2 "const_0_to_255_operand" "N")
	    (match_operand 3 "const_0_to_255_operand" "N")))
     (clobber (reg:CC FLAGS_REG))])]
  "TARGET_TBM"
{
  if (operands[2] == const0_rtx
      || INTVAL (operands[3]) >= <MODE_SIZE> * BITS_PER_UNIT)
    {
      emit_move_insn (operands[0], const0_rtx);
      DONE;
    }
  if (INTVAL (operands[2]) + INTVAL (operands[3])
      > <MODE_SIZE> * BITS_PER_UNIT)
    operands[2] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - INTVAL (operands[3]));
})

(define_insn "*tbm_bextri_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (zero_extract:SWI48
          (match_operand:SWI48 1 "nonimmediate_operand" "rm")
          (match_operand 2 "const_0_to_255_operand" "N")
          (match_operand 3 "const_0_to_255_operand" "N")))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
{
  operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
  return "bextr\t{%2, %1, %0|%0, %1, %2}";
}
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blcfill_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int 1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blcfill\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blci_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ior:SWI48
          (not:SWI48
            (plus:SWI48
              (match_operand:SWI48 1 "nonimmediate_operand" "rm")
              (const_int 1)))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blci\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blcic_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int 1))
          (not:SWI48
            (match_dup 1))))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blcic\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blcmsk_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (xor:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int 1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blcmsk\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blcs_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ior:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int 1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blcs\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blsfill_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ior:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int -1))
          (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blsfill\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_blsic_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ior:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int -1))
          (not:SWI48
            (match_dup 1))))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "blsic\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_t1mskc_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (ior:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int 1))
          (not:SWI48
            (match_dup 1))))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "t1mskc\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn "*tbm_tzmsk_<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
        (and:SWI48
          (plus:SWI48
            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
            (const_int -1))
          (not:SWI48
            (match_dup 1))))
   (clobber (reg:CC FLAGS_REG))]
   "TARGET_TBM"
   "tzmsk\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "popcount<mode>2"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(popcount:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT"
{
#if TARGET_MACHO
  return "popcnt\t{%1, %0|%0, %1}";
#else
  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
#endif
}
  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (match_dup 0)
	  (popcount:SWI48 (match_dup 1)))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
     (clobber (reg:CC FLAGS_REG))])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*popcount<mode>2_falsedep"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(popcount:SWI48
	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT"
{
#if TARGET_MACHO
  return "popcnt\t{%1, %0|%0, %1}";
#else
  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
#endif
}
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "<MODE>")])

(define_insn_and_split "*popcountsi2_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(and:DI
	  (subreg:DI
	    (popcount:SI
	      (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
	  (const_int 63)))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT && TARGET_64BIT"
{
#if TARGET_MACHO
  return "popcnt\t{%1, %k0|%k0, %1}";
#else
  return "popcnt{l}\t{%1, %k0|%k0, %1}";
#endif
}
  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
   && optimize_function_for_speed_p (cfun)
   && !reg_mentioned_p (operands[0], operands[1])"
  [(parallel
    [(set (match_dup 0)
	  (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
     (clobber (reg:CC FLAGS_REG))])]
  "ix86_expand_clear (operands[0]);"
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "SI")])

; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt.  There is no false dependency when destination is
; also used in source.
(define_insn "*popcountsi2_zext_falsedep"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(and:DI
	  (subreg:DI
	    (popcount:SI
	      (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
	  (const_int 63)))
   (unspec [(match_operand:DI 2 "register_operand" "0")]
	   UNSPEC_INSN_FALSE_DEP)
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT && TARGET_64BIT"
{
#if TARGET_MACHO
  return "popcnt\t{%1, %k0|%k0, %1}";
#else
  return "popcnt{l}\t{%1, %k0|%k0, %1}";
#endif
}
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "SI")])

(define_insn_and_split "*popcounthi2_1"
  [(set (match_operand:SI 0 "register_operand")
	(popcount:SI
	  (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  rtx tmp = gen_reg_rtx (HImode);

  emit_insn (gen_popcounthi2 (tmp, operands[1]));
  emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
  DONE;
})

(define_insn "popcounthi2"
  [(set (match_operand:HI 0 "register_operand" "=r")
	(popcount:HI
	  (match_operand:HI 1 "nonimmediate_operand" "rm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_POPCNT"
{
#if TARGET_MACHO
  return "popcnt\t{%1, %0|%0, %1}";
#else
  return "popcnt{w}\t{%1, %0|%0, %1}";
#endif
}
  [(set_attr "prefix_rep" "1")
   (set_attr "type" "bitmanip")
   (set_attr "mode" "HI")])

(define_expand "bswapdi2"
  [(set (match_operand:DI 0 "register_operand")
	(bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
  "TARGET_64BIT"
{
  if (!TARGET_MOVBE)
    operands[1] = force_reg (DImode, operands[1]);
})

(define_expand "bswapsi2"
  [(set (match_operand:SI 0 "register_operand")
	(bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
  ""
{
  if (TARGET_MOVBE)
    ;
  else if (TARGET_BSWAP)
    operands[1] = force_reg (SImode, operands[1]);
  else
    {
      rtx x = operands[0];

      emit_move_insn (x, operands[1]);
      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
      emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
      DONE;
    }
})

(define_insn "*bswap<mode>2_movbe"
  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
	(bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
  "TARGET_MOVBE
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
  "@
    bswap\t%0
    movbe{<imodesuffix>}\t{%1, %0|%0, %1}
    movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip,imov,imov")
   (set_attr "modrm" "0,1,1")
   (set_attr "prefix_0f" "*,1,1")
   (set_attr "prefix_extra" "*,1,1")
   (set_attr "mode" "<MODE>")])

(define_insn "*bswap<mode>2"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
  "TARGET_BSWAP"
  "bswap\t%0"
  [(set_attr "type" "bitmanip")
   (set_attr "modrm" "0")
   (set_attr "mode" "<MODE>")])

(define_expand "bswaphi2"
  [(set (match_operand:HI 0 "register_operand")
	(bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
  "TARGET_MOVBE")

(define_insn "*bswaphi2_movbe"
  [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
	(bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
  "TARGET_MOVBE
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
  "@
    xchg{b}\t{%h0, %b0|%b0, %h0}
    movbe{w}\t{%1, %0|%0, %1}
    movbe{w}\t{%1, %0|%0, %1}"
  [(set_attr "type" "imov")
   (set_attr "modrm" "*,1,1")
   (set_attr "prefix_0f" "*,1,1")
   (set_attr "prefix_extra" "*,1,1")
   (set_attr "pent_pair" "np,*,*")
   (set_attr "athlon_decode" "vector,*,*")
   (set_attr "amdfam10_decode" "double,*,*")
   (set_attr "bdver1_decode" "double,*,*")
   (set_attr "mode" "QI,HI,HI")])

(define_peephole2
  [(set (match_operand:HI 0 "general_reg_operand")
	(bswap:HI (match_dup 0)))]
  "TARGET_MOVBE
   && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_insn "bswaphi_lowpart"
  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
	(bswap:HI (match_dup 0)))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "@
    xchg{b}\t{%h0, %b0|%b0, %h0}
    rol{w}\t{$8, %0|%0, 8}"
  [(set (attr "preferred_for_size")
     (cond [(eq_attr "alternative" "0")
	      (symbol_ref "true")]
	   (symbol_ref "false")))
   (set (attr "preferred_for_speed")
     (cond [(eq_attr "alternative" "0")
	      (symbol_ref "TARGET_USE_XCHGB")]
	   (symbol_ref "!TARGET_USE_XCHGB")))
   (set_attr "length" "2,4")
   (set_attr "mode" "QI,HI")])

(define_expand "paritydi2"
  [(set (match_operand:DI 0 "register_operand")
	(parity:DI (match_operand:DI 1 "register_operand")))]
  "! TARGET_POPCNT"
{
  rtx scratch = gen_reg_rtx (QImode);

  emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
				NULL_RTX, operands[1]));

  ix86_expand_setcc (scratch, ORDERED,
		     gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);

  if (TARGET_64BIT)
    emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
  else
    {
      rtx tmp = gen_reg_rtx (SImode);

      emit_insn (gen_zero_extendqisi2 (tmp, scratch));
      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
    }
  DONE;
})

(define_expand "paritysi2"
  [(set (match_operand:SI 0 "register_operand")
	(parity:SI (match_operand:SI 1 "register_operand")))]
  "! TARGET_POPCNT"
{
  rtx scratch = gen_reg_rtx (QImode);

  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));

  ix86_expand_setcc (scratch, ORDERED,
		     gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);

  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
  DONE;
})

(define_insn_and_split "paritydi2_cmp"
  [(set (reg:CC FLAGS_REG)
	(unspec:CC [(match_operand:DI 3 "register_operand" "0")]
		   UNSPEC_PARITY))
   (clobber (match_scratch:DI 0 "=r"))
   (clobber (match_scratch:SI 1 "=&r"))
   (clobber (match_scratch:HI 2 "=Q"))]
  "! TARGET_POPCNT"
  "#"
  "&& reload_completed"
  [(parallel
     [(set (match_dup 1)
	   (xor:SI (match_dup 1) (match_dup 4)))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (reg:CC FLAGS_REG)
	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
      (clobber (match_dup 1))
      (clobber (match_dup 2))])]
{
  operands[4] = gen_lowpart (SImode, operands[3]);

  if (TARGET_64BIT)
    {
      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
    }
  else
    operands[1] = gen_highpart (SImode, operands[3]);
})

(define_insn_and_split "paritysi2_cmp"
  [(set (reg:CC FLAGS_REG)
	(unspec:CC [(match_operand:SI 2 "register_operand" "0")]
		   UNSPEC_PARITY))
   (clobber (match_scratch:SI 0 "=r"))
   (clobber (match_scratch:HI 1 "=&Q"))]
  "! TARGET_POPCNT"
  "#"
  "&& reload_completed"
  [(parallel
     [(set (match_dup 1)
	   (xor:HI (match_dup 1) (match_dup 3)))
      (clobber (reg:CC FLAGS_REG))])
   (parallel
     [(set (reg:CC FLAGS_REG)
	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
      (clobber (match_dup 1))])]
{
  operands[3] = gen_lowpart (HImode, operands[2]);

  emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
  emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
})

(define_insn "*parityhi2_cmp"
  [(set (reg:CC FLAGS_REG)
	(unspec:CC [(match_operand:HI 1 "register_operand" "0")]
		   UNSPEC_PARITY))
   (clobber (match_scratch:HI 0 "=Q"))]
  "! TARGET_POPCNT"
  "xor{b}\t{%h0, %b0|%b0, %h0}"
  [(set_attr "length" "2")
   (set_attr "mode" "HI")])


;; Thread-local storage patterns for ELF.
;;
;; Note that these code sequences must appear exactly as shown
;; in order to allow linker relaxation.

(define_insn "*tls_global_dynamic_32_gnu"
  [(set (match_operand:SI 0 "register_operand" "=a")
	(unspec:SI
	 [(match_operand:SI 1 "register_operand" "Yb")
	  (match_operand 2 "tls_symbolic_operand")
	  (match_operand 3 "constant_call_address_operand" "Bz")
	  (reg:SI SP_REG)]
	 UNSPEC_TLS_GD))
   (clobber (match_scratch:SI 4 "=d"))
   (clobber (match_scratch:SI 5 "=c"))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_GNU_TLS"
{
  if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    output_asm_insn
      ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
  else
    output_asm_insn
      ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
  if (TARGET_SUN_TLS)
#ifdef HAVE_AS_IX86_TLSGDPLT
    return "call\t%a2@tlsgdplt";
#else
    return "call\t%p3@plt";
#endif
  if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    return "call\t%P3";
  return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "12")])

(define_expand "tls_global_dynamic_32"
  [(parallel
    [(set (match_operand:SI 0 "register_operand")
	  (unspec:SI [(match_operand:SI 2 "register_operand")
		      (match_operand 1 "tls_symbolic_operand")
		      (match_operand 3 "constant_call_address_operand")
		      (reg:SI SP_REG)]
		     UNSPEC_TLS_GD))
     (clobber (match_scratch:SI 4))
     (clobber (match_scratch:SI 5))
     (clobber (reg:CC FLAGS_REG))])]
  ""
  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")

(define_insn "*tls_global_dynamic_64_<mode>"
  [(set (match_operand:P 0 "register_operand" "=a")
	(call:P
	 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
	 (match_operand 3)))
   (unspec:P [(match_operand 1 "tls_symbolic_operand")
	      (reg:P SP_REG)]
	     UNSPEC_TLS_GD)]
  "TARGET_64BIT"
{
  if (!TARGET_X32)
    /* The .loc directive has effect for 'the immediately following assembly
       instruction'.  So for a sequence:
         .loc f l
         .byte x
         insn1
       the 'immediately following assembly instruction' is insn1.
       We want to emit an insn prefix here, but if we use .byte (as shown in
       'ELF Handling For Thread-Local Storage'), a preceding .loc will point
       inside the insn sequence, rather than to the start.  After relaxation
       of the sequence by the linker, the .loc might point inside an insn.
       Use data16 prefix instead, which doesn't have this problem.  */
    fputs ("\tdata16", asm_out_file);
  output_asm_insn
    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
  if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    fputs (ASM_SHORT "0x6666\n", asm_out_file);
  else
    fputs (ASM_BYTE "0x66\n", asm_out_file);
  fputs ("\trex64\n", asm_out_file);
  if (TARGET_SUN_TLS)
    return "call\t%p2@plt";
  if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    return "call\t%P2";
  return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
}
  [(set_attr "type" "multi")
   (set (attr "length")
	(symbol_ref "TARGET_X32 ? 15 : 16"))])

(define_insn "*tls_global_dynamic_64_largepic"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(call:DI
	 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
			  (match_operand:DI 3 "immediate_operand" "i")))
	 (match_operand 4)))
   (unspec:DI [(match_operand 1 "tls_symbolic_operand")
	       (reg:DI SP_REG)]
	      UNSPEC_TLS_GD)]
  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
   && GET_CODE (operands[3]) == CONST
   && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
   && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
{
  output_asm_insn
    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
  output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
  output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
  return "call\t{*%%rax|rax}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "22")])

(define_expand "@tls_global_dynamic_64_<mode>"
  [(parallel
    [(set (match_operand:P 0 "register_operand")
	  (call:P
	   (mem:QI (match_operand 2))
	   (const_int 0)))
     (unspec:P [(match_operand 1 "tls_symbolic_operand")
		(reg:P SP_REG)]
	       UNSPEC_TLS_GD)])]
  "TARGET_64BIT"
  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")

(define_insn "*tls_local_dynamic_base_32_gnu"
  [(set (match_operand:SI 0 "register_operand" "=a")
	(unspec:SI
	 [(match_operand:SI 1 "register_operand" "Yb")
	  (match_operand 2 "constant_call_address_operand" "Bz")
	  (reg:SI SP_REG)]
	 UNSPEC_TLS_LD_BASE))
   (clobber (match_scratch:SI 3 "=d"))
   (clobber (match_scratch:SI 4 "=c"))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_GNU_TLS"
{
  output_asm_insn
    ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
  if (TARGET_SUN_TLS)
    {
      if (HAVE_AS_IX86_TLSLDMPLT)
	return "call\t%&@tlsldmplt";
      else
	return "call\t%p2@plt";
    }
  if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    return "call\t%P2";
  return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "11")])

(define_expand "tls_local_dynamic_base_32"
  [(parallel
     [(set (match_operand:SI 0 "register_operand")
	   (unspec:SI
	    [(match_operand:SI 1 "register_operand")
	     (match_operand 2 "constant_call_address_operand")
	     (reg:SI SP_REG)]
	    UNSPEC_TLS_LD_BASE))
      (clobber (match_scratch:SI 3))
      (clobber (match_scratch:SI 4))
      (clobber (reg:CC FLAGS_REG))])]
  ""
  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")

(define_insn "*tls_local_dynamic_base_64_<mode>"
  [(set (match_operand:P 0 "register_operand" "=a")
	(call:P
	 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
	 (match_operand 2)))
   (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
  "TARGET_64BIT"
{
  output_asm_insn
    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
  if (TARGET_SUN_TLS)
    return "call\t%p1@plt";
  if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
    return "call\t%P1";
  return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "12")])

(define_insn "*tls_local_dynamic_base_64_largepic"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(call:DI
	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
			  (match_operand:DI 2 "immediate_operand" "i")))
	 (match_operand 3)))
   (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
   && GET_CODE (operands[2]) == CONST
   && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
   && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
{
  output_asm_insn
    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
  output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
  output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
  return "call\t{*%%rax|rax}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "22")])

(define_expand "@tls_local_dynamic_base_64_<mode>"
  [(parallel
     [(set (match_operand:P 0 "register_operand")
	   (call:P
	    (mem:QI (match_operand 1))
	    (const_int 0)))
      (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
  "TARGET_64BIT"
  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")

;; Local dynamic of a single variable is a lose.  Show combine how
;; to convert that back to global dynamic.

(define_insn_and_split "*tls_local_dynamic_32_once"
  [(set (match_operand:SI 0 "register_operand" "=a")
	(plus:SI
	 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
		     (match_operand 2 "constant_call_address_operand" "Bz")
		     (reg:SI SP_REG)]
		    UNSPEC_TLS_LD_BASE)
	 (const:SI (unspec:SI
		    [(match_operand 3 "tls_symbolic_operand")]
		    UNSPEC_DTPOFF))))
   (clobber (match_scratch:SI 4 "=d"))
   (clobber (match_scratch:SI 5 "=c"))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  ""
  [(parallel
     [(set (match_dup 0)
	   (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
		       (reg:SI SP_REG)]
		      UNSPEC_TLS_GD))
      (clobber (match_dup 4))
      (clobber (match_dup 5))
      (clobber (reg:CC FLAGS_REG))])])

;; Load and add the thread base pointer from %<tp_seg>:0.
(define_insn_and_split "*load_tp_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=r")
	(unspec:PTR [(const_int 0)] UNSPEC_TP))]
  ""
  "#"
  ""
  [(set (match_dup 0)
	(match_dup 1))]
{
  addr_space_t as = DEFAULT_TLS_SEG_REG;

  operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
  set_mem_addr_space (operands[1], as);
})

(define_insn_and_split "*load_tp_x32_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (unspec:SI [(const_int 0)] UNSPEC_TP)))]
  "TARGET_X32"
  "#"
  ""
  [(set (match_dup 0)
	(zero_extend:DI (match_dup 1)))]
{
  addr_space_t as = DEFAULT_TLS_SEG_REG;

  operands[1] = gen_const_mem (SImode, const0_rtx);
  set_mem_addr_space (operands[1], as);
})

(define_insn_and_split "*add_tp_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=r")
	(plus:PTR
	  (unspec:PTR [(const_int 0)] UNSPEC_TP)
	  (match_operand:PTR 1 "register_operand" "0")))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "#"
  ""
  [(parallel
     [(set (match_dup 0)
	   (plus:PTR (match_dup 1) (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
{
  addr_space_t as = DEFAULT_TLS_SEG_REG;

  operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
  set_mem_addr_space (operands[2], as);
})

(define_insn_and_split "*add_tp_x32_zext"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(zero_extend:DI
	  (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
		   (match_operand:SI 1 "register_operand" "0"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_X32"
  "#"
  ""
  [(parallel
     [(set (match_dup 0)
     	   (zero_extend:DI
	     (plus:SI (match_dup 1) (match_dup 2))))
      (clobber (reg:CC FLAGS_REG))])]
{
  addr_space_t as = DEFAULT_TLS_SEG_REG;

  operands[2] = gen_const_mem (SImode, const0_rtx);
  set_mem_addr_space (operands[2], as);
})

;; The Sun linker took the AMD64 TLS spec literally and can only handle
;; %rax as destination of the initial executable code sequence.
(define_insn "tls_initial_exec_64_sun"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(unspec:DI
	 [(match_operand 1 "tls_symbolic_operand")]
	 UNSPEC_TLS_IE_SUN))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_SUN_TLS"
{
  output_asm_insn
    ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
  return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
}
  [(set_attr "type" "multi")])

;; GNU2 TLS patterns can be split.

(define_expand "tls_dynamic_gnu2_32"
  [(set (match_dup 3)
	(plus:SI (match_operand:SI 2 "register_operand")
		 (const:SI
		  (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
			     UNSPEC_TLSDESC))))
   (parallel
    [(set (match_operand:SI 0 "register_operand")
	  (unspec:SI [(match_dup 1) (match_dup 3)
		      (match_dup 2) (reg:SI SP_REG)]
		      UNSPEC_TLSDESC))
     (clobber (reg:CC FLAGS_REG))])]
  "!TARGET_64BIT && TARGET_GNU2_TLS"
{
  operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
  ix86_tls_descriptor_calls_expanded_in_cfun = true;
})

(define_insn "*tls_dynamic_gnu2_lea_32"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(plus:SI (match_operand:SI 1 "register_operand" "b")
		 (const:SI
		  (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
			      UNSPEC_TLSDESC))))]
  "!TARGET_64BIT && TARGET_GNU2_TLS"
  "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
  [(set_attr "type" "lea")
   (set_attr "mode" "SI")
   (set_attr "length" "6")
   (set_attr "length_address" "4")])

(define_insn "*tls_dynamic_gnu2_call_32"
  [(set (match_operand:SI 0 "register_operand" "=a")
	(unspec:SI [(match_operand 1 "tls_symbolic_operand")
		    (match_operand:SI 2 "register_operand" "0")
		    ;; we have to make sure %ebx still points to the GOT
		    (match_operand:SI 3 "register_operand" "b")
		    (reg:SI SP_REG)]
		   UNSPEC_TLSDESC))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_GNU2_TLS"
  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
  [(set_attr "type" "call")
   (set_attr "length" "2")
   (set_attr "length_address" "0")])

(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
  [(set (match_operand:SI 0 "register_operand" "=&a")
	(plus:SI
	 (unspec:SI [(match_operand 3 "tls_modbase_operand")
		     (match_operand:SI 4)
		     (match_operand:SI 2 "register_operand" "b")
		     (reg:SI SP_REG)]
		    UNSPEC_TLSDESC)
	 (const:SI (unspec:SI
		    [(match_operand 1 "tls_symbolic_operand")]
		    UNSPEC_DTPOFF))))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_GNU2_TLS"
  "#"
  ""
  [(set (match_dup 0) (match_dup 5))]
{
  operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
  emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
})

(define_expand "@tls_dynamic_gnu2_64_<mode>"
  [(set (match_dup 2)
	(unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
		    UNSPEC_TLSDESC))
   (parallel
    [(set (match_operand:PTR 0 "register_operand")
	  (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
		      UNSPEC_TLSDESC))
     (clobber (reg:CC FLAGS_REG))])]
  "TARGET_64BIT && TARGET_GNU2_TLS"
{
  operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
  ix86_tls_descriptor_calls_expanded_in_cfun = true;
})

(define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=r")
	(unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
		    UNSPEC_TLSDESC))]
  "TARGET_64BIT && TARGET_GNU2_TLS"
  "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
  [(set_attr "type" "lea")
   (set_attr "mode" "<MODE>")
   (set_attr "length" "7")
   (set_attr "length_address" "4")])

(define_insn "*tls_dynamic_gnu2_call_64_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=a")
	(unspec:PTR [(match_operand 1 "tls_symbolic_operand")
		   (match_operand:PTR 2 "register_operand" "0")
		   (reg:PTR SP_REG)]
		  UNSPEC_TLSDESC))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_GNU2_TLS"
  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
  [(set_attr "type" "call")
   (set_attr "length" "2")
   (set_attr "length_address" "0")])

(define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=&a")
	(plus:PTR
	 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
		      (match_operand:PTR 3)
		      (reg:PTR SP_REG)]
		     UNSPEC_TLSDESC)
	 (const:PTR (unspec:PTR
		     [(match_operand 1 "tls_symbolic_operand")]
		     UNSPEC_DTPOFF))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT && TARGET_GNU2_TLS"
  "#"
  ""
  [(set (match_dup 0) (match_dup 4))]
{
  operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
  emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
})

(define_split
  [(match_operand 0 "tls_address_pattern")]
  "TARGET_TLS_DIRECT_SEG_REFS"
  [(match_dup 0)]
  "operands[0] = ix86_rewrite_tls_address (operands[0]);")


;; These patterns match the binary 387 instructions for addM3, subM3,
;; mulM3 and divM3.  There are three patterns for each of DFmode and
;; SFmode.  The first is the normal insn, the second the same insn but
;; with one operand a conversion, and the third the same insn but with
;; the other operand a conversion.  The conversion may be SFmode or
;; SImode if the target mode DFmode, but only SImode if the target mode
;; is SFmode.

;; Gcc is slightly more smart about handling normal two address instructions
;; so use special patterns for add and mull.

(define_insn "*fop_xf_comm_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(match_operator:XF 3 "binary_fp_operator"
			[(match_operand:XF 1 "register_operand" "%0")
			 (match_operand:XF 2 "register_operand" "f")]))]
  "TARGET_80387
   && COMMUTATIVE_ARITH_P (operands[3])"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(if_then_else (match_operand:XF 3 "mult_operator")
	   (const_string "fmul")
	   (const_string "fop")))
   (set_attr "mode" "XF")])

(define_insn "*fop_<mode>_comm"
  [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
	(match_operator:MODEF 3 "binary_fp_operator"
	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
   && COMMUTATIVE_ARITH_P (operands[3])
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(if_then_else (eq_attr "alternative" "1,2")
	   (if_then_else (match_operand:MODEF 3 "mult_operator")
	      (const_string "ssemul")
	      (const_string "sseadd"))
	   (if_then_else (match_operand:MODEF 3 "mult_operator")
	      (const_string "fmul")
	      (const_string "fop"))))
   (set_attr "isa" "*,noavx,avx")
   (set_attr "prefix" "orig,orig,vex")
   (set_attr "mode" "<MODE>")
   (set (attr "enabled")
     (if_then_else
       (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "TARGET_MIX_SSE_I387
		      && X87_ENABLE_ARITH (<MODE>mode)")
	 (const_string "*"))
       (if_then_else
	 (eq_attr "alternative" "0")
	 (symbol_ref "true")
	 (symbol_ref "false"))))])

(define_insn "*rcpsf2_sse"
  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
		   UNSPEC_RCP))]
  "TARGET_SSE && TARGET_SSE_MATH"
  "@
   %vrcpss\t{%d1, %0|%0, %d1}
   %vrcpss\t{%d1, %0|%0, %d1}
   %vrcpss\t{%1, %d0|%d0, %1}"
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "rcp")
   (set_attr "btver2_sse_attr" "rcp")
   (set_attr "prefix" "maybe_vex")
   (set_attr "mode" "SF")
   (set_attr "avx_partial_xmm_update" "false,false,true")
   (set (attr "preferred_for_speed")
      (cond [(match_test "TARGET_AVX")
	       (symbol_ref "true")
	     (eq_attr "alternative" "1,2")
	       (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
	    ]
	    (symbol_ref "true")))])

(define_insn "*fop_xf_1_i387"
  [(set (match_operand:XF 0 "register_operand" "=f,f")
	(match_operator:XF 3 "binary_fp_operator"
			[(match_operand:XF 1 "register_operand" "0,f")
			 (match_operand:XF 2 "register_operand" "f,0")]))]
  "TARGET_80387
   && !COMMUTATIVE_ARITH_P (operands[3])"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(if_then_else (match_operand:XF 3 "div_operator")
	   (const_string "fdiv")
	   (const_string "fop")))
   (set_attr "mode" "XF")])

(define_insn "*fop_<mode>_1"
  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
	(match_operator:MODEF 3 "binary_fp_operator"
	  [(match_operand:MODEF 1
	     "x87nonimm_ssenomem_operand" "0,fm,0,v")
	   (match_operand:MODEF 2
	     "nonimmediate_operand"	  "fm,0,xm,vm")]))]
  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
   && !COMMUTATIVE_ARITH_P (operands[3])
   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(if_then_else (eq_attr "alternative" "2,3")
	   (if_then_else (match_operand:MODEF 3 "div_operator")
	      (const_string "ssediv")
	      (const_string "sseadd"))
	   (if_then_else (match_operand:MODEF 3 "div_operator")
	      (const_string "fdiv")
	      (const_string "fop"))))
   (set_attr "isa" "*,*,noavx,avx")
   (set_attr "prefix" "orig,orig,orig,vex")
   (set_attr "mode" "<MODE>")
   (set (attr "enabled")
     (if_then_else
       (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
       (if_then_else
	 (eq_attr "alternative" "0,1")
	 (symbol_ref "TARGET_MIX_SSE_I387
		      && X87_ENABLE_ARITH (<MODE>mode)")
	 (const_string "*"))
       (if_then_else
	 (eq_attr "alternative" "0,1")
	 (symbol_ref "true")
	 (symbol_ref "false"))))])

(define_insn "*fop_<X87MODEF:mode>_2_i387"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
	(match_operator:X87MODEF 3 "binary_fp_operator"
	  [(float:X87MODEF
	     (match_operand:SWI24 1 "nonimmediate_operand" "m"))
	   (match_operand:X87MODEF 2 "register_operand" "0")]))]
  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
       || optimize_function_for_size_p (cfun))"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:X87MODEF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:X87MODEF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "fp_int_src" "true")
   (set_attr "mode" "<SWI24:MODE>")])

(define_insn "*fop_<X87MODEF:mode>_3_i387"
  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
	(match_operator:X87MODEF 3 "binary_fp_operator"
	  [(match_operand:X87MODEF 1 "register_operand" "0")
	   (float:X87MODEF
	     (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
       || optimize_function_for_size_p (cfun))"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:X87MODEF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:X87MODEF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "fp_int_src" "true")
   (set_attr "mode" "<SWI24:MODE>")])

(define_insn "*fop_xf_4_i387"
  [(set (match_operand:XF 0 "register_operand" "=f,f")
	(match_operator:XF 3 "binary_fp_operator"
	   [(float_extend:XF
	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
	    (match_operand:XF 2 "register_operand" "0,f")]))]
  "TARGET_80387"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:XF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:XF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "<MODE>")])

(define_insn "*fop_df_4_i387"
  [(set (match_operand:DF 0 "register_operand" "=f,f")
	(match_operator:DF 3 "binary_fp_operator"
	   [(float_extend:DF
	     (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
	    (match_operand:DF 2 "register_operand" "0,f")]))]
  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:DF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:DF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "SF")])

(define_insn "*fop_xf_5_i387"
  [(set (match_operand:XF 0 "register_operand" "=f,f")
	(match_operator:XF 3 "binary_fp_operator"
	  [(match_operand:XF 1 "register_operand" "0,f")
	   (float_extend:XF
	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
  "TARGET_80387"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:XF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:XF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "<MODE>")])

(define_insn "*fop_df_5_i387"
  [(set (match_operand:DF 0 "register_operand" "=f,f")
	(match_operator:DF 3 "binary_fp_operator"
	  [(match_operand:DF 1 "register_operand" "0,f")
	   (float_extend:DF
	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:DF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:DF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "SF")])

(define_insn "*fop_xf_6_i387"
  [(set (match_operand:XF 0 "register_operand" "=f,f")
	(match_operator:XF 3 "binary_fp_operator"
	  [(float_extend:XF
	     (match_operand:MODEF 1 "register_operand" "0,f"))
	   (float_extend:XF
	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
  "TARGET_80387"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:XF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:XF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "<MODE>")])

(define_insn "*fop_df_6_i387"
  [(set (match_operand:DF 0 "register_operand" "=f,f")
	(match_operator:DF 3 "binary_fp_operator"
	  [(float_extend:DF
	    (match_operand:SF 1 "register_operand" "0,f"))
	   (float_extend:DF
	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
  "* return output_387_binary_op (insn, operands);"
  [(set (attr "type")
	(cond [(match_operand:DF 3 "mult_operator")
		 (const_string "fmul")
	       (match_operand:DF 3 "div_operator")
		 (const_string "fdiv")
	      ]
	      (const_string "fop")))
   (set_attr "mode" "SF")])

;; FPU special functions.

;; This pattern implements a no-op XFmode truncation for
;; all fancy i386 XFmode math functions.

(define_insn "truncxf<mode>2_i387_noop_unspec"
  [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
	(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
	UNSPEC_TRUNC_NOOP))]
  "TARGET_USE_FANCY_MATH_387"
  "* return output_387_reg_move (insn, operands);"
  [(set_attr "type" "fmov")
   (set_attr "mode" "<MODE>")])

(define_insn "sqrtxf2"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
  "TARGET_USE_FANCY_MATH_387"
  "fsqrt"
  [(set_attr "type" "fpspc")
   (set_attr "mode" "XF")
   (set_attr "athlon_decode" "direct")
   (set_attr "amdfam10_decode" "direct")
   (set_attr "bdver1_decode" "direct")])

(define_insn "*rsqrtsf2_sse"
  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
		   UNSPEC_RSQRT))]
  "TARGET_SSE && TARGET_SSE_MATH"
  "@
   %vrsqrtss\t{%d1, %0|%0, %d1}
   %vrsqrtss\t{%d1, %0|%0, %d1}
   %vrsqrtss\t{%1, %d0|%d0, %1}"
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "rcp")
   (set_attr "btver2_sse_attr" "rcp")
   (set_attr "prefix" "maybe_vex")
   (set_attr "mode" "SF")
   (set_attr "avx_partial_xmm_update" "false,false,true")
   (set (attr "preferred_for_speed")
      (cond [(match_test "TARGET_AVX")
	       (symbol_ref "true")
	     (eq_attr "alternative" "1,2")
	       (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
	    ]
	    (symbol_ref "true")))])

(define_expand "rsqrtsf2"
  [(set (match_operand:SF 0 "register_operand")
	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
		   UNSPEC_RSQRT))]
  "TARGET_SSE && TARGET_SSE_MATH"
{
  ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
  DONE;
})

(define_insn "*sqrt<mode>2_sse"
  [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
	(sqrt:MODEF
	  (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "@
   %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
   %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
   %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "sqrt")
   (set_attr "btver2_sse_attr" "sqrt")
   (set_attr "prefix" "maybe_vex")
   (set_attr "avx_partial_xmm_update" "false,false,true")
   (set_attr "mode" "<MODE>")
   (set (attr "preferred_for_speed")
      (cond [(match_test "TARGET_AVX")
	       (symbol_ref "true")
	     (eq_attr "alternative" "1,2")
	       (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
	    ]
	    (symbol_ref "true")))])

(define_expand "sqrt<mode>2"
  [(set (match_operand:MODEF 0 "register_operand")
	(sqrt:MODEF
	  (match_operand:MODEF 1 "nonimmediate_operand")))]
  "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  if (<MODE>mode == SFmode
      && TARGET_SSE && TARGET_SSE_MATH
      && TARGET_RECIP_SQRT
      && !optimize_function_for_size_p (cfun)
      && flag_finite_math_only && !flag_trapping_math
      && flag_unsafe_math_optimizations)
    {
      ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
      DONE;
    }

  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
    {
      rtx op0 = gen_reg_rtx (XFmode);
      rtx op1 = gen_reg_rtx (XFmode);

      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
      emit_insn (gen_sqrtxf2 (op0, op1));
      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
      DONE;
   }
})

(define_expand "hypot<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_finite_math_only
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));

  emit_insn (gen_mulxf3 (op1, op1, op1));
  emit_insn (gen_mulxf3 (op2, op2, op2));
  emit_insn (gen_addxf3 (op0, op2, op1));
  emit_insn (gen_sqrtxf2 (op0, op0));

  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "x86_fnstsw_1"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
  "TARGET_80387"
  "fnstsw\t%0"
  [(set_attr "length" "2")
   (set_attr "mode" "SI")
   (set_attr "unit" "i387")])

(define_insn "fpremxf4_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
		    (match_operand:XF 3 "register_operand" "1")]
		   UNSPEC_FPREM_F))
   (set (match_operand:XF 1 "register_operand" "=f")
	(unspec:XF [(match_dup 2) (match_dup 3)]
		   UNSPEC_FPREM_U))
   (set (reg:CCFP FPSR_REG)
	(unspec:CCFP [(match_dup 2) (match_dup 3)]
		     UNSPEC_C2_FLAG))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
  "fprem"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "fmodxf3"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "general_operand"))
   (use (match_operand:XF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
{
  rtx_code_label *label = gen_label_rtx ();

  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_move_insn (op2, operands[2]);
  emit_move_insn (op1, operands[1]);

  emit_label (label);
  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
  ix86_emit_fp_unordered_jump (label);
  LABEL_NUSES (label) = 1;

  emit_move_insn (operands[0], op1);
  DONE;
})

(define_expand "fmod<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
{
  rtx (*gen_truncxf) (rtx, rtx);

  rtx_code_label *label = gen_label_rtx ();

  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));

  emit_label (label);
  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
  ix86_emit_fp_unordered_jump (label);
  LABEL_NUSES (label) = 1;

  /* Truncate the result properly for strict SSE math.  */
  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
      && !TARGET_MIX_SSE_I387)
    gen_truncxf = gen_truncxf<mode>2;
  else
    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;

  emit_insn (gen_truncxf (operands[0], op1));
  DONE;
})

(define_insn "fprem1xf4_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
		    (match_operand:XF 3 "register_operand" "1")]
		   UNSPEC_FPREM1_F))
   (set (match_operand:XF 1 "register_operand" "=f")
	(unspec:XF [(match_dup 2) (match_dup 3)]
		   UNSPEC_FPREM1_U))
   (set (reg:CCFP FPSR_REG)
	(unspec:CCFP [(match_dup 2) (match_dup 3)]
		     UNSPEC_C2_FLAG))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
  "fprem1"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "remainderxf3"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "general_operand"))
   (use (match_operand:XF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
{
  rtx_code_label *label = gen_label_rtx ();

  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_move_insn (op2, operands[2]);
  emit_move_insn (op1, operands[1]);

  emit_label (label);
  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
  ix86_emit_fp_unordered_jump (label);
  LABEL_NUSES (label) = 1;

  emit_move_insn (operands[0], op1);
  DONE;
})

(define_expand "remainder<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only"
{
  rtx (*gen_truncxf) (rtx, rtx);

  rtx_code_label *label = gen_label_rtx ();

  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));

  emit_label (label);

  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
  ix86_emit_fp_unordered_jump (label);
  LABEL_NUSES (label) = 1;

  /* Truncate the result properly for strict SSE math.  */
  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
      && !TARGET_MIX_SSE_I387)
    gen_truncxf = gen_truncxf<mode>2;
  else
    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;

  emit_insn (gen_truncxf (operands[0], op1));
  DONE;
})

(define_int_iterator SINCOS
	[UNSPEC_SIN
	 UNSPEC_COS])

(define_int_attr sincos
	[(UNSPEC_SIN "sin")
	 (UNSPEC_COS "cos")])

(define_insn "<sincos>xf2"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
		   SINCOS))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "f<sincos>"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "<sincos><mode>2"
  [(set (match_operand:MODEF 0 "register_operand")
	(unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
		      SINCOS))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_<sincos>xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "sincosxf3"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
		   UNSPEC_SINCOS_COS))
   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fsincos"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "sincos<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "register_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_sincosxf3 (op0, op1, op2));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  emit_insn (gen_truncxf<mode>2 (operands[1], op1));
  DONE;
})

(define_insn "fptanxf4_i387"
  [(set (match_operand:SF 0 "register_operand" "=f")
	(match_operand:SF 3 "const1_operand"))
   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
		   UNSPEC_TAN))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fptan"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "tanxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx one = gen_reg_rtx (SFmode);
  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
				CONST1_RTX (SFmode)));
  DONE;
})

(define_expand "tan<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_tanxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "atan2xf3"
  [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
	            (match_operand:XF 2 "register_operand" "f")]
	           UNSPEC_FPATAN))
   (clobber (match_scratch:XF 3 "=2"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fpatan"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "atan2<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));

  emit_insn (gen_atan2xf3 (op0, op2, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "atanxf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_dup 2)
			       (match_operand:XF 1 "register_operand")]
			      UNSPEC_FPATAN))
	      (clobber (match_scratch:XF 3))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")

(define_expand "atan<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_atanxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "asinxf2"
  [(set (match_dup 2)
	(mult:XF (match_operand:XF 1 "register_operand")
		 (match_dup 1)))
   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
   (set (match_dup 5) (sqrt:XF (match_dup 4)))
   (parallel [(set (match_operand:XF 0 "register_operand")
        	   (unspec:XF [(match_dup 5) (match_dup 1)]
			      UNSPEC_FPATAN))
   	      (clobber (match_scratch:XF 6))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  int i;

  for (i = 2; i < 6; i++)
    operands[i] = gen_reg_rtx (XFmode);

  emit_move_insn (operands[3], CONST1_RTX (XFmode));
})

(define_expand "asin<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_asinxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "acosxf2"
  [(set (match_dup 2)
	(mult:XF (match_operand:XF 1 "register_operand")
		 (match_dup 1)))
   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
   (set (match_dup 5) (sqrt:XF (match_dup 4)))
   (parallel [(set (match_operand:XF 0 "register_operand")
        	   (unspec:XF [(match_dup 1) (match_dup 5)]
			      UNSPEC_FPATAN))
   	      (clobber (match_scratch:XF 6))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  int i;

  for (i = 2; i < 6; i++)
    operands[i] = gen_reg_rtx (XFmode);

  emit_move_insn (operands[3], CONST1_RTX (XFmode));
})

(define_expand "acos<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_acosxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "sinhxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_sinh (operands[0], operands[1]);
  DONE;
})

(define_expand "sinh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_finite_math_only
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_sinhxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "coshxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_cosh (operands[0], operands[1]);
  DONE;
})

(define_expand "cosh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_coshxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "tanhxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_tanh (operands[0], operands[1]);
  DONE;
})

(define_expand "tanh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_tanhxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "asinhxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_finite_math_only
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_asinh (operands[0], operands[1]);
  DONE;
})

(define_expand "asinh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_finite_math_only
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_asinhxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "acoshxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_acosh (operands[0], operands[1]);
  DONE;
})

(define_expand "acosh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_acoshxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "atanhxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_atanh (operands[0], operands[1]);
  DONE;
})

(define_expand "atanh<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_atanhxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "fyl2xxf3_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
		    (match_operand:XF 2 "register_operand" "f")]
	           UNSPEC_FYL2X))
   (clobber (match_scratch:XF 3 "=2"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fyl2x"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "logxf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")
			       (match_dup 2)] UNSPEC_FYL2X))
	      (clobber (match_scratch:XF 3))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  operands[2]
    = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
})

(define_expand "log<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_logxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "log10xf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")
			       (match_dup 2)] UNSPEC_FYL2X))
	      (clobber (match_scratch:XF 3))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  operands[2]
    = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
})

(define_expand "log10<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_log10xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "log2xf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")
			       (match_dup 2)] UNSPEC_FYL2X))
	      (clobber (match_scratch:XF 3))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")

(define_expand "log2<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_log2xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "fyl2xp1xf3_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
		    (match_operand:XF 2 "register_operand" "f")]
	           UNSPEC_FYL2XP1))
   (clobber (match_scratch:XF 3 "=2"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fyl2xp1"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "log1pxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  ix86_emit_i387_log1p (operands[0], operands[1]);
  DONE;
})

(define_expand "log1p<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_log1pxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_insn "fxtractxf3_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
		   UNSPEC_XTRACT_FRACT))
   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fxtract"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "logbxf2"
  [(parallel [(set (match_dup 2)
		   (unspec:XF [(match_operand:XF 1 "register_operand")]
			      UNSPEC_XTRACT_FRACT))
	      (set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "operands[2] = gen_reg_rtx (XFmode);")

(define_expand "logb<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_logbxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op1));
  DONE;
})

(define_expand "ilogbxf2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx op0, op1;

  if (optimize_insn_for_size_p ())
    FAIL;

  op0 = gen_reg_rtx (XFmode);
  op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
  DONE;
})

(define_expand "ilogb<mode>2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0, op1, op2;

  if (optimize_insn_for_size_p ())
    FAIL;

  op0 = gen_reg_rtx (XFmode);
  op1 = gen_reg_rtx (XFmode);
  op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
  emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
  DONE;
})

(define_insn "*f2xm1xf2_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
		   UNSPEC_F2XM1))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "f2xm1"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_insn "fscalexf4_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
		    (match_operand:XF 3 "register_operand" "1")]
		   UNSPEC_FSCALE_FRACT))
   (set (match_operand:XF 1 "register_operand" "=f")
	(unspec:XF [(match_dup 2) (match_dup 3)]
		   UNSPEC_FSCALE_EXP))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "fscale"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "expNcorexf3"
  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
			       (match_operand:XF 2 "register_operand")))
   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
   (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
   (parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_dup 8) (match_dup 4)]
			      UNSPEC_FSCALE_FRACT))
	      (set (match_dup 9)
		   (unspec:XF [(match_dup 8) (match_dup 4)]
			      UNSPEC_FSCALE_EXP))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  int i;

  for (i = 3; i < 10; i++)
    operands[i] = gen_reg_rtx (XFmode);

  emit_move_insn (operands[7], CONST1_RTX (XFmode));
})

(define_expand "expxf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */

  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  DONE;
})

(define_expand "exp<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_expxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "exp10xf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */

  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  DONE;
})

(define_expand "exp10<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_exp10xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "exp2xf2"
  [(use (match_operand:XF 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));

  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  DONE;
})

(define_expand "exp2<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_exp2xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "expm1xf2"
  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
			       (match_dup 2)))
   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
   (parallel [(set (match_dup 7)
		   (unspec:XF [(match_dup 6) (match_dup 4)]
			      UNSPEC_FSCALE_FRACT))
	      (set (match_dup 8)
		   (unspec:XF [(match_dup 6) (match_dup 4)]
			      UNSPEC_FSCALE_EXP))])
   (parallel [(set (match_dup 10)
		   (unspec:XF [(match_dup 9) (match_dup 8)]
			      UNSPEC_FSCALE_FRACT))
	      (set (match_dup 11)
		   (unspec:XF [(match_dup 9) (match_dup 8)]
			      UNSPEC_FSCALE_EXP))])
   (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
   (set (match_operand:XF 0 "register_operand")
	(plus:XF (match_dup 12) (match_dup 7)))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  int i;

  for (i = 2; i < 13; i++)
    operands[i] = gen_reg_rtx (XFmode);

  emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
  emit_move_insn (operands[9], CONST1_RTX (XFmode));
})

(define_expand "expm1<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_expm1xf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "ldexpxf3"
  [(match_operand:XF 0 "register_operand")
   (match_operand:XF 1 "register_operand")
   (match_operand:SI 2 "register_operand")]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
{
  rtx tmp1 = gen_reg_rtx (XFmode);
  rtx tmp2 = gen_reg_rtx (XFmode);

  emit_insn (gen_floatsixf2 (tmp1, operands[2]));
  emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
                                 operands[1], tmp1));
  DONE;
})

(define_expand "ldexp<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:SI 2 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "scalbxf3"
  [(parallel [(set (match_operand:XF 0 " register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")
			       (match_operand:XF 2 "register_operand")]
			      UNSPEC_FSCALE_FRACT))
	      (set (match_dup 3)
		   (unspec:XF [(match_dup 1) (match_dup 2)]
			      UNSPEC_FSCALE_EXP))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "operands[3] = gen_reg_rtx (XFmode);")

(define_expand "scalb<mode>3"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))
   (use (match_operand:MODEF 2 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);
  rtx op2 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
  emit_insn (gen_scalbxf3 (op0, op1, op2));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})

(define_expand "significandxf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")]
			      UNSPEC_XTRACT_FRACT))
	      (set (match_dup 2)
		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "operands[2] = gen_reg_rtx (XFmode);")

(define_expand "significand<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "general_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
       || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations"
{
  rtx op0 = gen_reg_rtx (XFmode);
  rtx op1 = gen_reg_rtx (XFmode);

  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
  emit_insn (gen_significandxf2 (op0, op1));
  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
  DONE;
})


(define_insn "sse4_1_round<mode>2"
  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v,v")
	(unspec:MODEF
	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,v,m")
	   (match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n,n")]
	  UNSPEC_ROUND))]
  "TARGET_SSE4_1"
  "@
   %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
   %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
   %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
   vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
   vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix_extra" "1,1,1,*,*")
   (set_attr "length_immediate" "*,*,*,1,1")
   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
   (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
   (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
   (set_attr "mode" "<MODE>")
   (set (attr "preferred_for_speed")
      (cond [(match_test "TARGET_AVX")
	       (symbol_ref "true")
	     (eq_attr "alternative" "1,2")
	       (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
	    ]
	    (symbol_ref "true")))])

(define_insn "rintxf2"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
		   UNSPEC_FRNDINT))]
  "TARGET_USE_FANCY_MATH_387"
  "frndint"
  [(set_attr "type" "fpspc")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "XF")])

(define_expand "rint<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
  "TARGET_USE_FANCY_MATH_387
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    {
      if (TARGET_SSE4_1)
	emit_insn (gen_sse4_1_round<mode>2
		   (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
      else
	ix86_expand_rint (operands[0], operands[1]);
    }
  else
    {
      rtx op0 = gen_reg_rtx (XFmode);
      rtx op1 = gen_reg_rtx (XFmode);

      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
      emit_insn (gen_rintxf2 (op0, op1));
      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
    }
  DONE;
})

(define_expand "nearbyintxf2"
  [(set (match_operand:XF 0 "register_operand")
	(unspec:XF [(match_operand:XF 1 "register_operand")]
		   UNSPEC_FRNDINT))]
  "TARGET_USE_FANCY_MATH_387
   && !flag_trapping_math")

(define_expand "nearbyint<mode>2"
  [(use (match_operand:MODEF 0 "register_operand"))
   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
  "(TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
	  || TARGET_MIX_SSE_I387)
    && !flag_trapping_math)
   || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
{
  if (TARGET_SSE4_1 && TARGET_SSE_MATH)
    emit_insn (gen_sse4_1_round<mode>2
	       (operands[0], operands[1], GEN_INT (ROUND_MXCSR
						   | ROUND_NO_EXC)));
  else
    {
      rtx op0 = gen_reg_rtx (XFmode);
      rtx op1 = gen_reg_rtx (XFmode);

      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
      emit_insn (gen_nearbyintxf2 (op0, op1));
      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
    }
  DONE;
})

(define_expand "round<mode>2"
  [(match_operand:X87MODEF 0 "register_operand")
   (match_operand:X87MODEF 1 "nonimmediate_operand")]
  "(TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
	|| TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations
    && (flag_fp_int_builtin_inexact || !flag_trapping_math))
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math && !flag_rounding_math)"
{
  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
      && !flag_trapping_math && !flag_rounding_math)
    {
      if (TARGET_SSE4_1)
        {
	  operands[1] = force_reg (<MODE>mode, operands[1]);
	  ix86_expand_round_sse4 (operands[0], operands[1]);
	}
      else if (TARGET_64BIT || (<MODE>mode != DFmode))
	ix86_expand_round (operands[0], operands[1]);
      else
	ix86_expand_rounddf_32 (operands[0], operands[1]);
    }
  else
    {
      operands[1] = force_reg (<MODE>mode, operands[1]);
      ix86_emit_i387_round (operands[0], operands[1]);
    }
  DONE;
})

(define_insn "lrintxfdi2"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
		   UNSPEC_FIST))
   (clobber (match_scratch:XF 2 "=&f"))]
  "TARGET_USE_FANCY_MATH_387"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fpspc")
   (set_attr "mode" "DI")])

(define_insn "lrintxf<mode>2"
  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
		      UNSPEC_FIST))]
  "TARGET_USE_FANCY_MATH_387"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fpspc")
   (set_attr "mode" "<MODE>")])

(define_expand "lrint<MODEF:mode><SWI48:mode>2"
  [(set (match_operand:SWI48 0 "nonimmediate_operand")
     (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
		   UNSPEC_FIX_NOTRUNC))]
  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")

(define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
  [(match_operand:SWI248x 0 "nonimmediate_operand")
   (match_operand:X87MODEF 1 "register_operand")]
  "(TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
	|| TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations)
   || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
       && <SWI248x:MODE>mode != HImode 
       && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
       && !flag_trapping_math && !flag_rounding_math)"
{
  if (optimize_insn_for_size_p ())
    FAIL;

  if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
      && <SWI248x:MODE>mode != HImode
      && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
      && !flag_trapping_math && !flag_rounding_math)
    ix86_expand_lround (operands[0], operands[1]);
  else
    ix86_emit_i387_round (operands[0], operands[1]);
  DONE;
})

(define_int_iterator FRNDINT_ROUNDING
	[UNSPEC_FRNDINT_ROUNDEVEN
	 UNSPEC_FRNDINT_FLOOR
	 UNSPEC_FRNDINT_CEIL
	 UNSPEC_FRNDINT_TRUNC])

(define_int_iterator FIST_ROUNDING
	[UNSPEC_FIST_FLOOR
	 UNSPEC_FIST_CEIL])

;; Base name for define_insn
(define_int_attr rounding_insn
	[(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
	 (UNSPEC_FRNDINT_FLOOR "floor")
	 (UNSPEC_FRNDINT_CEIL "ceil")
	 (UNSPEC_FRNDINT_TRUNC "btrunc")
	 (UNSPEC_FIST_FLOOR "floor")
	 (UNSPEC_FIST_CEIL "ceil")])

(define_int_attr rounding
	[(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
	 (UNSPEC_FRNDINT_FLOOR "floor")
	 (UNSPEC_FRNDINT_CEIL "ceil")
	 (UNSPEC_FRNDINT_TRUNC "trunc")
	 (UNSPEC_FIST_FLOOR "floor")
	 (UNSPEC_FIST_CEIL "ceil")])

(define_int_attr ROUNDING
	[(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
	 (UNSPEC_FRNDINT_FLOOR "FLOOR")
	 (UNSPEC_FRNDINT_CEIL "CEIL")
	 (UNSPEC_FRNDINT_TRUNC "TRUNC")
	 (UNSPEC_FIST_FLOOR "FLOOR")
	 (UNSPEC_FIST_CEIL "CEIL")])

;; Rounding mode control word calculation could clobber FLAGS_REG.
(define_insn_and_split "frndintxf2_<rounding>"
  [(set (match_operand:XF 0 "register_operand")
	(unspec:XF [(match_operand:XF 1 "register_operand")]
		   FRNDINT_ROUNDING))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_FANCY_MATH_387
   && (flag_fp_int_builtin_inexact || !flag_trapping_math)
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;

  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
  operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);

  emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
					     operands[2], operands[3]));
  DONE;
}
  [(set_attr "type" "frndint")
   (set_attr "i387_cw" "<rounding>")
   (set_attr "mode" "XF")])

(define_insn "frndintxf2_<rounding>_i387"
  [(set (match_operand:XF 0 "register_operand" "=f")
	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
		   FRNDINT_ROUNDING))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))]
  "TARGET_USE_FANCY_MATH_387
   && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
  [(set_attr "type" "frndint")
   (set_attr "i387_cw" "<rounding>")
   (set_attr "mode" "XF")])

(define_expand "<rounding_insn>xf2"
  [(parallel [(set (match_operand:XF 0 "register_operand")
		   (unspec:XF [(match_operand:XF 1 "register_operand")]
			      FRNDINT_ROUNDING))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_USE_FANCY_MATH_387
   && (flag_fp_int_builtin_inexact || !flag_trapping_math)")

(define_expand "<rounding_insn><mode>2"
  [(parallel [(set (match_operand:MODEF 0 "register_operand")
		   (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
				 FRNDINT_ROUNDING))
	      (clobber (reg:CC FLAGS_REG))])]
  "(TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
	|| TARGET_MIX_SSE_I387)
    && (flag_fp_int_builtin_inexact || !flag_trapping_math))
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && (TARGET_SSE4_1
	  || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
	      && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
{
  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
      && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
    {
      if (TARGET_SSE4_1)
	emit_insn (gen_sse4_1_round<mode>2
		   (operands[0], operands[1], GEN_INT (ROUND_<ROUNDING>
						       | ROUND_NO_EXC)));
      else if (TARGET_64BIT || (<MODE>mode != DFmode))
	{
	  if (ROUND_<ROUNDING> == ROUND_FLOOR)
	    ix86_expand_floorceil (operands[0], operands[1], true);
	  else if (ROUND_<ROUNDING> == ROUND_CEIL)
	    ix86_expand_floorceil (operands[0], operands[1], false);
	  else if (ROUND_<ROUNDING> == ROUND_TRUNC)
	    ix86_expand_trunc (operands[0], operands[1]);
	  else
	    gcc_unreachable ();
	}
      else
	{
	  if (ROUND_<ROUNDING> == ROUND_FLOOR)
	    ix86_expand_floorceildf_32 (operands[0], operands[1], true);
	  else if (ROUND_<ROUNDING> == ROUND_CEIL)
	    ix86_expand_floorceildf_32 (operands[0], operands[1], false);
	  else if (ROUND_<ROUNDING> == ROUND_TRUNC)
	    ix86_expand_truncdf_32 (operands[0], operands[1]);
	  else
	    gcc_unreachable ();
	}
    }
  else
    {
      rtx op0 = gen_reg_rtx (XFmode);
      rtx op1 = gen_reg_rtx (XFmode);

      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
      emit_insn (gen_frndintxf2_<rounding> (op0, op1));
      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
    }
  DONE;
})

;; Rounding mode control word calculation could clobber FLAGS_REG.
(define_insn_and_split "*fist<mode>2_<rounding>_1"
  [(set (match_operand:SWI248x 0 "nonimmediate_operand")
	(unspec:SWI248x [(match_operand:XF 1 "register_operand")]
			FIST_ROUNDING))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(const_int 0)]
{
  ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;

  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
  operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);

  emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
					 operands[2], operands[3]));
  DONE;
}
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "<rounding>")
   (set_attr "mode" "<MODE>")])

(define_insn "fistdi2_<rounding>"
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
		   FIST_ROUNDING))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))
   (clobber (match_scratch:XF 4 "=&f"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "<rounding>")
   (set_attr "mode" "DI")])

(define_insn "fist<mode>2_<rounding>"
  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
	(unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
		      FIST_ROUNDING))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))]
  "TARGET_USE_FANCY_MATH_387
   && flag_unsafe_math_optimizations"
  "* return output_fix_trunc (insn, operands, false);"
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "<rounding>")
   (set_attr "mode" "<MODE>")])

(define_expand "l<rounding_insn>xf<mode>2"
  [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
		   (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
				   FIST_ROUNDING))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_USE_FANCY_MATH_387
   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
   && flag_unsafe_math_optimizations")

(define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
  [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
		   (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
				 FIST_ROUNDING))
	      (clobber (reg:CC FLAGS_REG))])]
  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
   && (TARGET_SSE4_1 || !flag_trapping_math)"
{
  if (TARGET_SSE4_1)
    {
      rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);

      emit_insn (gen_sse4_1_round<MODEF:mode>2
		 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
					     | ROUND_NO_EXC)));
      emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
		 (operands[0], tmp));
    }
  else if (ROUND_<ROUNDING> == ROUND_FLOOR)
    ix86_expand_lfloorceil (operands[0], operands[1], true);
  else if (ROUND_<ROUNDING> == ROUND_CEIL)
    ix86_expand_lfloorceil (operands[0], operands[1], false);
  else
    gcc_unreachable ();

  DONE;
})

(define_insn "fxam<mode>2_i387"
  [(set (match_operand:HI 0 "register_operand" "=a")
	(unspec:HI
	  [(match_operand:X87MODEF 1 "register_operand" "f")]
	  UNSPEC_FXAM))]
  "TARGET_USE_FANCY_MATH_387"
  "fxam\n\tfnstsw\t%0"
  [(set_attr "type" "multi")
   (set_attr "length" "4")
   (set_attr "unit" "i387")
   (set_attr "mode" "<MODE>")])

(define_expand "signbittf2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:TF 1 "register_operand"))]
  "TARGET_SSE"
{
  if (TARGET_SSE4_1)
    {
      rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
      rtx scratch = gen_reg_rtx (QImode);

      emit_insn (gen_ptesttf2 (operands[1], mask));
	ix86_expand_setcc (scratch, NE,
			   gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);

      emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
    }
  else
    {
      emit_insn (gen_sse_movmskps (operands[0],
				   gen_lowpart (V4SFmode, operands[1])));
      emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
    }
  DONE;
})

(define_expand "signbitxf2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:XF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387"
{
  rtx scratch = gen_reg_rtx (HImode);

  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
  emit_insn (gen_andsi3 (operands[0],
	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
  DONE;
})

(define_insn "movmsk_df"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec:SI
	  [(match_operand:DF 1 "register_operand" "x")]
	  UNSPEC_MOVMSK))]
  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
  "%vmovmskpd\t{%1, %0|%0, %1}"
  [(set_attr "type" "ssemov")
   (set_attr "prefix" "maybe_vex")
   (set_attr "mode" "DF")])

;; Use movmskpd in SSE mode to avoid store forwarding stall
;; for 32bit targets and movq+shrq sequence for 64bit targets.
(define_expand "signbitdf2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:DF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
{
  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
    {
      emit_insn (gen_movmsk_df (operands[0], operands[1]));
      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
    }
  else
    {
      rtx scratch = gen_reg_rtx (HImode);

      emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
      emit_insn (gen_andsi3 (operands[0],
		 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
    }
  DONE;
})

(define_expand "signbitsf2"
  [(use (match_operand:SI 0 "register_operand"))
   (use (match_operand:SF 1 "register_operand"))]
  "TARGET_USE_FANCY_MATH_387
   && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
  rtx scratch = gen_reg_rtx (HImode);

  emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
  emit_insn (gen_andsi3 (operands[0],
	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
  DONE;
})

;; Block operation instructions

(define_insn "cld"
  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
  ""
  "cld"
  [(set_attr "length" "1")
   (set_attr "length_immediate" "0")
   (set_attr "modrm" "0")])

(define_expand "cpymem<mode>"
  [(use (match_operand:BLK 0 "memory_operand"))
   (use (match_operand:BLK 1 "memory_operand"))
   (use (match_operand:SWI48 2 "nonmemory_operand"))
   (use (match_operand:SWI48 3 "const_int_operand"))
   (use (match_operand:SI 4 "const_int_operand"))
   (use (match_operand:SI 5 "const_int_operand"))
   (use (match_operand:SI 6 ""))
   (use (match_operand:SI 7 ""))
   (use (match_operand:SI 8 ""))]
  ""
{
 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
			        operands[2], NULL, operands[3],
			        operands[4], operands[5],
				operands[6], operands[7],
				operands[8], false))
   DONE;
 else
   FAIL;
})

;; Most CPUs don't like single string operations
;; Handle this case here to simplify previous expander.

(define_expand "strmov"
  [(set (match_dup 4) (match_operand 3 "memory_operand"))
   (set (match_operand 1 "memory_operand") (match_dup 4))
   (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
	      (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  /* Can't use this for non-default address spaces.  */
  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
    FAIL;

  rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));

  /* If .md ever supports :P for Pmode, these can be directly
     in the pattern above.  */
  operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
  operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);

  /* Can't use this if the user has appropriated esi or edi.  */
  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
    {
      emit_insn (gen_strmov_singleop (operands[0], operands[1],
				      operands[2], operands[3],
				      operands[5], operands[6]));
      DONE;
    }

  operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
})

(define_expand "strmov_singleop"
  [(parallel [(set (match_operand 1 "memory_operand")
		   (match_operand 3 "memory_operand"))
	      (set (match_operand 0 "register_operand")
		   (match_operand 4))
	      (set (match_operand 2 "register_operand")
		   (match_operand 5))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*strmovdi_rex_1"
  [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
	(mem:DI (match_operand:P 3 "register_operand" "1")))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 2)
		(const_int 8)))
   (set (match_operand:P 1 "register_operand" "=S")
	(plus:P (match_dup 3)
		(const_int 8)))]
  "TARGET_64BIT
   && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^movsq"
  [(set_attr "type" "str")
   (set_attr "memory" "both")
   (set_attr "mode" "DI")])

(define_insn "*strmovsi_1"
  [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
	(mem:SI (match_operand:P 3 "register_operand" "1")))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 2)
		(const_int 4)))
   (set (match_operand:P 1 "register_operand" "=S")
	(plus:P (match_dup 3)
		(const_int 4)))]
  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^movs{l|d}"
  [(set_attr "type" "str")
   (set_attr "memory" "both")
   (set_attr "mode" "SI")])

(define_insn "*strmovhi_1"
  [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
	(mem:HI (match_operand:P 3 "register_operand" "1")))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 2)
		(const_int 2)))
   (set (match_operand:P 1 "register_operand" "=S")
	(plus:P (match_dup 3)
		(const_int 2)))]
  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^movsw"
  [(set_attr "type" "str")
   (set_attr "memory" "both")
   (set_attr "mode" "HI")])

(define_insn "*strmovqi_1"
  [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
	(mem:QI (match_operand:P 3 "register_operand" "1")))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 2)
		(const_int 1)))
   (set (match_operand:P 1 "register_operand" "=S")
	(plus:P (match_dup 3)
		(const_int 1)))]
  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^movsb"
  [(set_attr "type" "str")
   (set_attr "memory" "both")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "mode" "QI")])

(define_expand "rep_mov"
  [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
	      (set (match_operand 0 "register_operand")
		   (match_operand 5))
	      (set (match_operand 2 "register_operand")
		   (match_operand 6))
	      (set (match_operand 1 "memory_operand")
		   (match_operand 3 "memory_operand"))
	      (use (match_dup 4))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*rep_movdi_rex64"
  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
			  (const_int 3))
		(match_operand:P 3 "register_operand" "0")))
   (set (match_operand:P 1 "register_operand" "=S")
        (plus:P (ashift:P (match_dup 5) (const_int 3))
		(match_operand:P 4 "register_operand" "1")))
   (set (mem:BLK (match_dup 3))
	(mem:BLK (match_dup 4)))
   (use (match_dup 5))]
  "TARGET_64BIT
   && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} movsq"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "both")
   (set_attr "mode" "DI")])

(define_insn "*rep_movsi"
  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
			  (const_int 2))
		 (match_operand:P 3 "register_operand" "0")))
   (set (match_operand:P 1 "register_operand" "=S")
        (plus:P (ashift:P (match_dup 5) (const_int 2))
		(match_operand:P 4 "register_operand" "1")))
   (set (mem:BLK (match_dup 3))
	(mem:BLK (match_dup 4)))
   (use (match_dup 5))]
  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} movs{l|d}"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "both")
   (set_attr "mode" "SI")])

(define_insn "*rep_movqi"
  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (match_operand:P 3 "register_operand" "0")
		(match_operand:P 5 "register_operand" "2")))
   (set (match_operand:P 1 "register_operand" "=S")
        (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
   (set (mem:BLK (match_dup 3))
	(mem:BLK (match_dup 4)))
   (use (match_dup 5))]
  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} movsb"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "both")
   (set_attr "mode" "QI")])

(define_expand "setmem<mode>"
   [(use (match_operand:BLK 0 "memory_operand"))
    (use (match_operand:SWI48 1 "nonmemory_operand"))
    (use (match_operand:QI 2 "nonmemory_operand"))
    (use (match_operand 3 "const_int_operand"))
    (use (match_operand:SI 4 "const_int_operand"))
    (use (match_operand:SI 5 "const_int_operand"))
    (use (match_operand:SI 6 ""))
    (use (match_operand:SI 7 ""))
    (use (match_operand:SI 8 ""))]
  ""
{
 if (ix86_expand_set_or_cpymem (operands[0], NULL,
			        operands[1], operands[2],
				operands[3], operands[4],
			        operands[5], operands[6],
				operands[7], operands[8], true))
   DONE;
 else
   FAIL;
})

;; Most CPUs don't like single string operations
;; Handle this case here to simplify previous expander.

(define_expand "strset"
  [(set (match_operand 1 "memory_operand")
	(match_operand 2 "register_operand"))
   (parallel [(set (match_operand 0 "register_operand")
		   (match_dup 3))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  /* Can't use this for non-default address spaces.  */
  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
    FAIL;

  if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
    operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);

  /* If .md ever supports :P for Pmode, this can be directly
     in the pattern above.  */
  operands[3] = gen_rtx_PLUS (Pmode, operands[0],
			      GEN_INT (GET_MODE_SIZE (GET_MODE
						      (operands[2]))));
  /* Can't use this if the user has appropriated eax or edi.  */
  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
      && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
    {
      emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
				      operands[3]));
      DONE;
    }
})

(define_expand "strset_singleop"
  [(parallel [(set (match_operand 1 "memory_operand")
		   (match_operand 2 "register_operand"))
	      (set (match_operand 0 "register_operand")
		   (match_operand 3))
	      (unspec [(const_int 0)] UNSPEC_STOS)])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*strsetdi_rex_1"
  [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
	(match_operand:DI 2 "register_operand" "a"))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 1)
		(const_int 8)))
   (unspec [(const_int 0)] UNSPEC_STOS)]
  "TARGET_64BIT
   && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^stosq"
  [(set_attr "type" "str")
   (set_attr "memory" "store")
   (set_attr "mode" "DI")])

(define_insn "*strsetsi_1"
  [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
	(match_operand:SI 2 "register_operand" "a"))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 1)
		(const_int 4)))
   (unspec [(const_int 0)] UNSPEC_STOS)]
  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^stos{l|d}"
  [(set_attr "type" "str")
   (set_attr "memory" "store")
   (set_attr "mode" "SI")])

(define_insn "*strsethi_1"
  [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
	(match_operand:HI 2 "register_operand" "a"))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 1)
		(const_int 2)))
   (unspec [(const_int 0)] UNSPEC_STOS)]
  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^stosw"
  [(set_attr "type" "str")
   (set_attr "memory" "store")
   (set_attr "mode" "HI")])

(define_insn "*strsetqi_1"
  [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
	(match_operand:QI 2 "register_operand" "a"))
   (set (match_operand:P 0 "register_operand" "=D")
	(plus:P (match_dup 1)
		(const_int 1)))
   (unspec [(const_int 0)] UNSPEC_STOS)]
  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^stosb"
  [(set_attr "type" "str")
   (set_attr "memory" "store")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "mode" "QI")])

(define_expand "rep_stos"
  [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
	      (set (match_operand 0 "register_operand")
		   (match_operand 4))
	      (set (match_operand 2 "memory_operand") (const_int 0))
	      (use (match_operand 3 "register_operand"))
	      (use (match_dup 1))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*rep_stosdi_rex64"
  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
			  (const_int 3))
		 (match_operand:P 3 "register_operand" "0")))
   (set (mem:BLK (match_dup 3))
	(const_int 0))
   (use (match_operand:DI 2 "register_operand" "a"))
   (use (match_dup 4))]
  "TARGET_64BIT
   && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} stosq"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "store")
   (set_attr "mode" "DI")])

(define_insn "*rep_stossi"
  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
			  (const_int 2))
		 (match_operand:P 3 "register_operand" "0")))
   (set (mem:BLK (match_dup 3))
	(const_int 0))
   (use (match_operand:SI 2 "register_operand" "a"))
   (use (match_dup 4))]
  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} stos{l|d}"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "store")
   (set_attr "mode" "SI")])

(define_insn "*rep_stosqi"
  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
   (set (match_operand:P 0 "register_operand" "=D")
        (plus:P (match_operand:P 3 "register_operand" "0")
		(match_operand:P 4 "register_operand" "1")))
   (set (mem:BLK (match_dup 3))
	(const_int 0))
   (use (match_operand:QI 2 "register_operand" "a"))
   (use (match_dup 4))]
  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^rep{%;} stosb"
  [(set_attr "type" "str")
   (set_attr "prefix_rep" "1")
   (set_attr "memory" "store")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "mode" "QI")])

(define_expand "cmpstrnsi"
  [(set (match_operand:SI 0 "register_operand")
	(compare:SI (match_operand:BLK 1 "general_operand")
		    (match_operand:BLK 2 "general_operand")))
   (use (match_operand 3 "general_operand"))
   (use (match_operand 4 "immediate_operand"))]
  ""
{
  rtx addr1, addr2, countreg, align, out;

  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
    FAIL;

  /* Can't use this if the user has appropriated ecx, esi or edi.  */
  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
    FAIL;

  /* One of the strings must be a constant.  If so, expand_builtin_strncmp()
     will have rewritten the length arg to be the minimum of the const string
     length and the actual length arg.  If both strings are the same and
     shorter than the length arg, repz cmpsb will not stop at the 0 byte and
     will incorrectly base the results on chars past the 0 byte.  */
  tree t1 = MEM_EXPR (operands[1]);
  tree t2 = MEM_EXPR (operands[2]);
  if (!((t1 && TREE_CODE (t1) == MEM_REF
         && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
         && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST)
      || (t2 && TREE_CODE (t2) == MEM_REF
          && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
          && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST)))
    FAIL;

  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
  if (addr1 != XEXP (operands[1], 0))
    operands[1] = replace_equiv_address_nv (operands[1], addr1);
  if (addr2 != XEXP (operands[2], 0))
    operands[2] = replace_equiv_address_nv (operands[2], addr2);

  countreg = ix86_zero_extend_to_Pmode (operands[3]);

  /* %%% Iff we are testing strict equality, we can use known alignment
     to good advantage.  This may be possible with combine, particularly
     once cc0 is dead.  */
  align = operands[4];

  if (CONST_INT_P (operands[3]))
    {
      if (operands[3] == const0_rtx)
	{
	  emit_move_insn (operands[0], const0_rtx);
	  DONE;
	}
      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
				     operands[1], operands[2]));
    }
  else
    {
      emit_insn (gen_cmp_1 (Pmode, countreg, countreg));
      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
				  operands[1], operands[2]));
    }

  out = gen_lowpart (QImode, operands[0]);
  emit_insn (gen_cmpintqi (out));
  emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out));

  DONE;
})

;; Produce a tri-state integer (-1, 0, 1) from condition codes.

(define_expand "cmpintqi"
  [(set (match_dup 1)
	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (set (match_dup 2)
	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (parallel [(set (match_operand:QI 0 "register_operand")
		   (minus:QI (match_dup 1)
			     (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  operands[1] = gen_reg_rtx (QImode);
  operands[2] = gen_reg_rtx (QImode);
})

;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
;; zero.  Emit extra code to make sure that a zero-length compare is EQ.

(define_expand "cmpstrnqi_nz_1"
  [(parallel [(set (reg:CC FLAGS_REG)
		   (compare:CC (match_operand 4 "memory_operand")
			       (match_operand 5 "memory_operand")))
	      (use (match_operand 2 "register_operand"))
	      (use (match_operand:SI 3 "immediate_operand"))
	      (clobber (match_operand 0 "register_operand"))
	      (clobber (match_operand 1 "register_operand"))
	      (clobber (match_dup 2))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*cmpstrnqi_nz_1"
  [(set (reg:CC FLAGS_REG)
	(compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
		    (mem:BLK (match_operand:P 5 "register_operand" "1"))))
   (use (match_operand:P 6 "register_operand" "2"))
   (use (match_operand:SI 3 "immediate_operand" "i"))
   (clobber (match_operand:P 0 "register_operand" "=S"))
   (clobber (match_operand:P 1 "register_operand" "=D"))
   (clobber (match_operand:P 2 "register_operand" "=c"))]
  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^repz{%;} cmpsb"
  [(set_attr "type" "str")
   (set_attr "mode" "QI")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "prefix_rep" "1")])

;; The same, but the count is not known to not be zero.

(define_expand "cmpstrnqi_1"
  [(parallel [(set (reg:CC FLAGS_REG)
		(if_then_else:CC (ne (match_operand 2 "register_operand")
				     (const_int 0))
		  (compare:CC (match_operand 4 "memory_operand")
			      (match_operand 5 "memory_operand"))
		  (const_int 0)))
	      (use (match_operand:SI 3 "immediate_operand"))
	      (use (reg:CC FLAGS_REG))
	      (clobber (match_operand 0 "register_operand"))
	      (clobber (match_operand 1 "register_operand"))
	      (clobber (match_dup 2))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*cmpstrnqi_1"
  [(set (reg:CC FLAGS_REG)
	(if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
			     (const_int 0))
	  (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
		      (mem:BLK (match_operand:P 5 "register_operand" "1")))
	  (const_int 0)))
   (use (match_operand:SI 3 "immediate_operand" "i"))
   (use (reg:CC FLAGS_REG))
   (clobber (match_operand:P 0 "register_operand" "=S"))
   (clobber (match_operand:P 1 "register_operand" "=D"))
   (clobber (match_operand:P 2 "register_operand" "=c"))]
  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^repz{%;} cmpsb"
  [(set_attr "type" "str")
   (set_attr "mode" "QI")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "prefix_rep" "1")])

(define_expand "strlen<mode>"
  [(set (match_operand:P 0 "register_operand")
	(unspec:P [(match_operand:BLK 1 "general_operand")
		   (match_operand:QI 2 "immediate_operand")
		   (match_operand 3 "immediate_operand")]
		  UNSPEC_SCAS))]
  ""
{
 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
   DONE;
 else
   FAIL;
})

(define_expand "strlenqi_1"
  [(parallel [(set (match_operand 0 "register_operand")
		   (match_operand 2))
	      (clobber (match_operand 1 "register_operand"))
	      (clobber (reg:CC FLAGS_REG))])]
  ""
{
  if (TARGET_CLD)
    ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
})

(define_insn "*strlenqi_1"
  [(set (match_operand:P 0 "register_operand" "=&c")
	(unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
		   (match_operand:QI 2 "register_operand" "a")
		   (match_operand:P 3 "immediate_operand" "i")
		   (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
   (clobber (match_operand:P 1 "register_operand" "=D"))
   (clobber (reg:CC FLAGS_REG))]
  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
   && ix86_check_no_addr_space (insn)"
  "%^repnz{%;} scasb"
  [(set_attr "type" "str")
   (set_attr "mode" "QI")
   (set (attr "prefix_rex")
	(if_then_else
	  (match_test "<P:MODE>mode == DImode")
	  (const_string "0")
	  (const_string "*")))
   (set_attr "prefix_rep" "1")])

;; Peephole optimizations to clean up after cmpstrn*.  This should be
;; handled in combine, but it is not currently up to the task.
;; When used for their truth value, the cmpstrn* expanders generate
;; code like this:
;;
;;   repz cmpsb
;;   seta 	%al
;;   setb 	%dl
;;   cmpb 	%al, %dl
;;   jcc	label
;;
;; The intermediate three instructions are unnecessary.

;; This one handles cmpstrn*_nz_1...
(define_peephole2
  [(parallel[
     (set (reg:CC FLAGS_REG)
	  (compare:CC (mem:BLK (match_operand 4 "register_operand"))
		      (mem:BLK (match_operand 5 "register_operand"))))
     (use (match_operand 6 "register_operand"))
     (use (match_operand:SI 3 "immediate_operand"))
     (clobber (match_operand 0 "register_operand"))
     (clobber (match_operand 1 "register_operand"))
     (clobber (match_operand 2 "register_operand"))])
   (set (match_operand:QI 7 "register_operand")
	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (set (match_operand:QI 8 "register_operand")
	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (set (reg FLAGS_REG)
	(compare (match_dup 7) (match_dup 8)))
  ]
  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
  [(parallel[
     (set (reg:CC FLAGS_REG)
	  (compare:CC (mem:BLK (match_dup 4))
		      (mem:BLK (match_dup 5))))
     (use (match_dup 6))
     (use (match_dup 3))
     (clobber (match_dup 0))
     (clobber (match_dup 1))
     (clobber (match_dup 2))])])

;; ...and this one handles cmpstrn*_1.
(define_peephole2
  [(parallel[
     (set (reg:CC FLAGS_REG)
	  (if_then_else:CC (ne (match_operand 6 "register_operand")
			       (const_int 0))
	    (compare:CC (mem:BLK (match_operand 4 "register_operand"))
		        (mem:BLK (match_operand 5 "register_operand")))
	    (const_int 0)))
     (use (match_operand:SI 3 "immediate_operand"))
     (use (reg:CC FLAGS_REG))
     (clobber (match_operand 0 "register_operand"))
     (clobber (match_operand 1 "register_operand"))
     (clobber (match_operand 2 "register_operand"))])
   (set (match_operand:QI 7 "register_operand")
	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (set (match_operand:QI 8 "register_operand")
	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
   (set (reg FLAGS_REG)
	(compare (match_dup 7) (match_dup 8)))
  ]
  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
  [(parallel[
     (set (reg:CC FLAGS_REG)
	  (if_then_else:CC (ne (match_dup 6)
			       (const_int 0))
	    (compare:CC (mem:BLK (match_dup 4))
			(mem:BLK (match_dup 5)))
	    (const_int 0)))
     (use (match_dup 3))
     (use (reg:CC FLAGS_REG))
     (clobber (match_dup 0))
     (clobber (match_dup 1))
     (clobber (match_dup 2))])])

;; Conditional move instructions.

(define_expand "mov<mode>cc"
  [(set (match_operand:SWIM 0 "register_operand")
	(if_then_else:SWIM (match_operand 1 "comparison_operator")
			   (match_operand:SWIM 2 "<general_operand>")
			   (match_operand:SWIM 3 "<general_operand>")))]
  ""
  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")

;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
;; the register first winds up with `sbbl $0,reg', which is also weird.
;; So just document what we're doing explicitly.

(define_expand "x86_mov<mode>cc_0_m1"
  [(parallel
    [(set (match_operand:SWI48 0 "register_operand")
	  (if_then_else:SWI48
	    (match_operator:SWI48 2 "ix86_carry_flag_operator"
	     [(match_operand 1 "flags_reg_operand")
	      (const_int 0)])
	    (const_int -1)
	    (const_int 0)))
     (clobber (reg:CC FLAGS_REG))])])

(define_insn "*x86_mov<mode>cc_0_m1"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
			     [(reg FLAGS_REG) (const_int 0)])
	  (const_int -1)
	  (const_int 0)))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "sbb{<imodesuffix>}\t%0, %0"
  [(set_attr "type" "alu1")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "0")])

(define_insn "*x86_mov<mode>cc_0_m1_se"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
			     [(reg FLAGS_REG) (const_int 0)])
			    (const_int 1)
			    (const_int 0)))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "sbb{<imodesuffix>}\t%0, %0"
  [(set_attr "type" "alu1")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "0")])

(define_insn "*x86_mov<mode>cc_0_m1_neg"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(neg:SWI48 (match_operator 1 "ix86_carry_flag_operator"
		    [(reg FLAGS_REG) (const_int 0)])))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "sbb{<imodesuffix>}\t%0, %0"
  [(set_attr "type" "alu1")
   (set_attr "use_carry" "1")
   (set_attr "pent_pair" "pu")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "0")])

(define_insn_and_split "*x86_mov<SWI48:mode>cc_0_m1_neg_leu<SWI:mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(neg:SWI48
	  (leu:SWI48
	    (match_operand:SWI 1 "nonimmediate_operand" "<SWI:r>m")
	    (match_operand:SWI 2 "<SWI:immediate_operand>" "<SWI:i>"))))
   (clobber (reg:CC FLAGS_REG))]
  "CONST_INT_P (operands[2])
   && INTVAL (operands[2]) != -1
   && INTVAL (operands[2]) != 2147483647"
  "#"
  ""
  [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
   (parallel [(set (match_dup 0)
		   (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")

(define_insn "*mov<mode>cc_noc"
  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
			       [(reg FLAGS_REG) (const_int 0)])
	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
	  (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
  "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
  "@
   cmov%O2%C1\t{%2, %0|%0, %2}
   cmov%O2%c1\t{%3, %0|%0, %3}"
  [(set_attr "type" "icmov")
   (set_attr "mode" "<MODE>")])

(define_insn "*movsicc_noc_zext"
  [(set (match_operand:DI 0 "register_operand" "=r,r")
	(if_then_else:DI (match_operator 1 "ix86_comparison_operator"
			   [(reg FLAGS_REG) (const_int 0)])
	  (zero_extend:DI
	    (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
	  (zero_extend:DI
	    (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
  "TARGET_64BIT
   && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
  "@
   cmov%O2%C1\t{%2, %k0|%k0, %2}
   cmov%O2%c1\t{%3, %k0|%k0, %3}"
  [(set_attr "type" "icmov")
   (set_attr "mode" "SI")])

;; Don't do conditional moves with memory inputs.  This splitter helps
;; register starved x86_32 by forcing inputs into registers before reload.
(define_split
  [(set (match_operand:SWI248 0 "register_operand")
	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
			       [(reg FLAGS_REG) (const_int 0)])
	  (match_operand:SWI248 2 "nonimmediate_operand")
	  (match_operand:SWI248 3 "nonimmediate_operand")))]
  "!TARGET_64BIT && TARGET_CMOVE
   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
   && (MEM_P (operands[2]) || MEM_P (operands[3]))
   && can_create_pseudo_p ()
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 0)
	(if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
{
  if (MEM_P (operands[2]))
    operands[2] = force_reg (<MODE>mode, operands[2]);
  if (MEM_P (operands[3]))
    operands[3] = force_reg (<MODE>mode, operands[3]);
})

(define_insn "*movqicc_noc"
  [(set (match_operand:QI 0 "register_operand" "=r,r")
	(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
			   [(reg FLAGS_REG) (const_int 0)])
		      (match_operand:QI 2 "register_operand" "r,0")
		      (match_operand:QI 3 "register_operand" "0,r")))]
  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
  "#"
  [(set_attr "type" "icmov")
   (set_attr "mode" "QI")])

(define_split
  [(set (match_operand:SWI12 0 "register_operand")
	(if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
			      [(reg FLAGS_REG) (const_int 0)])
		      (match_operand:SWI12 2 "register_operand")
		      (match_operand:SWI12 3 "register_operand")))]
  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
   && reload_completed"
  [(set (match_dup 0)
	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[2] = gen_lowpart (SImode, operands[2]);
  operands[3] = gen_lowpart (SImode, operands[3]);
})

;; Don't do conditional moves with memory inputs
(define_peephole2
  [(match_scratch:SWI248 4 "r")
   (set (match_operand:SWI248 0 "register_operand")
	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
			       [(reg FLAGS_REG) (const_int 0)])
	  (match_operand:SWI248 2 "nonimmediate_operand")
	  (match_operand:SWI248 3 "nonimmediate_operand")))]
  "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
   && (MEM_P (operands[2]) || MEM_P (operands[3]))
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 4) (match_dup 5))
   (set (match_dup 0)
	(if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
{
  if (MEM_P (operands[2]))
    {
      operands[5] = operands[2];
      operands[2] = operands[4];
    }
  else if (MEM_P (operands[3]))
    {
      operands[5] = operands[3];
      operands[3] = operands[4];
    }
  else
    gcc_unreachable ();
})

(define_peephole2
  [(match_scratch:SI 4 "r")
   (set (match_operand:DI 0 "register_operand")
	(if_then_else:DI (match_operator 1 "ix86_comparison_operator"
			   [(reg FLAGS_REG) (const_int 0)])
	  (zero_extend:DI
	    (match_operand:SI 2 "nonimmediate_operand"))
	  (zero_extend:DI
	    (match_operand:SI 3 "nonimmediate_operand"))))]
  "TARGET_64BIT
   && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
   && (MEM_P (operands[2]) || MEM_P (operands[3]))
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 4) (match_dup 5))
   (set (match_dup 0)
	(if_then_else:DI (match_dup 1)
	  (zero_extend:DI (match_dup 2))
	  (zero_extend:DI (match_dup 3))))]
{
  if (MEM_P (operands[2]))
    {
      operands[5] = operands[2];
      operands[2] = operands[4];
    }
  else if (MEM_P (operands[3]))
    {
      operands[5] = operands[3];
      operands[3] = operands[4];
    }
  else
    gcc_unreachable ();
})

(define_expand "mov<mode>cc"
  [(set (match_operand:X87MODEF 0 "register_operand")
	(if_then_else:X87MODEF
	  (match_operand 1 "comparison_operator")
	  (match_operand:X87MODEF 2 "register_operand")
	  (match_operand:X87MODEF 3 "register_operand")))]
  "(TARGET_80387 && TARGET_CMOVE)
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")

(define_insn "*movxfcc_1"
  [(set (match_operand:XF 0 "register_operand" "=f,f")
	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
				[(reg FLAGS_REG) (const_int 0)])
		      (match_operand:XF 2 "register_operand" "f,0")
		      (match_operand:XF 3 "register_operand" "0,f")))]
  "TARGET_80387 && TARGET_CMOVE"
  "@
   fcmov%F1\t{%2, %0|%0, %2}
   fcmov%f1\t{%3, %0|%0, %3}"
  [(set_attr "type" "fcmov")
   (set_attr "mode" "XF")])

(define_insn "*movdfcc_1"
  [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
				[(reg FLAGS_REG) (const_int 0)])
		      (match_operand:DF 2 "nonimmediate_operand"
					       "f ,0,rm,0 ,rm,0")
		      (match_operand:DF 3 "nonimmediate_operand"
					       "0 ,f,0 ,rm,0, rm")))]
  "TARGET_80387 && TARGET_CMOVE
   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
  "@
   fcmov%F1\t{%2, %0|%0, %2}
   fcmov%f1\t{%3, %0|%0, %3}
   #
   #
   cmov%O2%C1\t{%2, %0|%0, %2}
   cmov%O2%c1\t{%3, %0|%0, %3}"
  [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
   (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
   (set_attr "mode" "DF,DF,DI,DI,DI,DI")])

(define_split
  [(set (match_operand:DF 0 "general_reg_operand")
	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
				[(reg FLAGS_REG) (const_int 0)])
		      (match_operand:DF 2 "nonimmediate_operand")
		      (match_operand:DF 3 "nonimmediate_operand")))]
  "!TARGET_64BIT && reload_completed"
  [(set (match_dup 2)
	(if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
   (set (match_dup 3)
	(if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
{
  split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
  split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
})

(define_insn "*movsfcc_1_387"
  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
				[(reg FLAGS_REG) (const_int 0)])
		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
  "TARGET_80387 && TARGET_CMOVE
   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
  "@
   fcmov%F1\t{%2, %0|%0, %2}
   fcmov%f1\t{%3, %0|%0, %3}
   cmov%O2%C1\t{%2, %0|%0, %2}
   cmov%O2%c1\t{%3, %0|%0, %3}"
  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
   (set_attr "mode" "SF,SF,SI,SI")])

;; Don't do conditional moves with memory inputs.  This splitter helps
;; register starved x86_32 by forcing inputs into registers before reload.
(define_split
  [(set (match_operand:MODEF 0 "register_operand")
	(if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
			      [(reg FLAGS_REG) (const_int 0)])
	  (match_operand:MODEF 2 "nonimmediate_operand")
	  (match_operand:MODEF 3 "nonimmediate_operand")))]
  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
   && (MEM_P (operands[2]) || MEM_P (operands[3]))
   && can_create_pseudo_p ()
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 0)
	(if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
{
  if (MEM_P (operands[2]))
    operands[2] = force_reg (<MODE>mode, operands[2]);
  if (MEM_P (operands[3]))
    operands[3] = force_reg (<MODE>mode, operands[3]);
})

;; Don't do conditional moves with memory inputs
(define_peephole2
  [(match_scratch:MODEF 4 "r")
   (set (match_operand:MODEF 0 "general_reg_operand")
	(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
			      [(reg FLAGS_REG) (const_int 0)])
	  (match_operand:MODEF 2 "nonimmediate_operand")
	  (match_operand:MODEF 3 "nonimmediate_operand")))]
  "(<MODE>mode != DFmode || TARGET_64BIT)
   && TARGET_80387 && TARGET_CMOVE
   && TARGET_AVOID_MEM_OPND_FOR_CMOVE
   && (MEM_P (operands[2]) || MEM_P (operands[3]))
   && optimize_insn_for_speed_p ()"
  [(set (match_dup 4) (match_dup 5))
   (set (match_dup 0)
	(if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
{
  if (MEM_P (operands[2]))
    {
      operands[5] = operands[2];
      operands[2] = operands[4];
    }
  else if (MEM_P (operands[3]))
    {
      operands[5] = operands[3];
      operands[3] = operands[4];
    }
  else
    gcc_unreachable ();
})

;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
;; the scalar versions to have only XMM registers as operands.

;; XOP conditional move
(define_insn "*xop_pcmov_<mode>"
  [(set (match_operand:MODEF 0 "register_operand" "=x")
	(if_then_else:MODEF
	  (match_operand:MODEF 1 "register_operand" "x")
	  (match_operand:MODEF 2 "register_operand" "x")
	  (match_operand:MODEF 3 "register_operand" "x")))]
  "TARGET_XOP"
  "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
  [(set_attr "type" "sse4arg")])

;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.

(define_insn "<code><mode>3"
  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
	(smaxmin:MODEF
	  (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
	  (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "@
   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "isa" "noavx,avx")
   (set_attr "prefix" "orig,vex")
   (set_attr "type" "sseadd")
   (set_attr "mode" "<MODE>")])

;; These versions of the min/max patterns implement exactly the operations
;;   min = (op1 < op2 ? op1 : op2)
;;   max = (!(op1 < op2) ? op1 : op2)
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.

(define_insn "*ieee_s<ieee_maxmin><mode>3"
  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
	(unspec:MODEF
	  [(match_operand:MODEF 1 "register_operand" "0,v")
	   (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
	  IEEE_MAXMIN))]
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
  "@
   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
   v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "isa" "noavx,avx")
   (set_attr "prefix" "orig,maybe_evex")
   (set_attr "type" "sseadd")
   (set_attr "mode" "<MODE>")])

;; Make two stack loads independent:
;;   fld aa              fld aa
;;   fld %st(0)     ->   fld bb
;;   fmul bb             fmul %st(1), %st
;;
;; Actually we only match the last two instructions for simplicity.

(define_peephole2
  [(set (match_operand 0 "fp_register_operand")
	(match_operand 1 "fp_register_operand"))
   (set (match_dup 0)
	(match_operator 2 "binary_fp_operator"
	   [(match_dup 0)
	    (match_operand 3 "memory_operand")]))]
  "REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0) (match_dup 3))
   (set (match_dup 0)
	(match_op_dup 2
	  [(match_dup 5) (match_dup 4)]))]
{
  operands[4] = operands[0];
  operands[5] = operands[1];

  /* The % modifier is not operational anymore in peephole2's, so we have to
     swap the operands manually in the case of addition and multiplication. */
  if (COMMUTATIVE_ARITH_P (operands[2]))
    std::swap (operands[4], operands[5]);
})

(define_peephole2
  [(set (match_operand 0 "fp_register_operand")
	(match_operand 1 "fp_register_operand"))
   (set (match_dup 0)
	(match_operator 2 "binary_fp_operator"
	   [(match_operand 3 "memory_operand")
	    (match_dup 0)]))]
  "REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0) (match_dup 3))
   (set (match_dup 0)
	(match_op_dup 2
	  [(match_dup 4) (match_dup 5)]))]
{
  operands[4] = operands[0];
  operands[5] = operands[1];

  /* The % modifier is not operational anymore in peephole2's, so we have to
     swap the operands manually in the case of addition and multiplication. */
  if (COMMUTATIVE_ARITH_P (operands[2]))
    std::swap (operands[4], operands[5]);
})

;; Conditional addition patterns
(define_expand "add<mode>cc"
  [(match_operand:SWI 0 "register_operand")
   (match_operand 1 "ordered_comparison_operator")
   (match_operand:SWI 2 "register_operand")
   (match_operand:SWI 3 "const_int_operand")]
  ""
  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")

;; min/max patterns

(define_mode_iterator MAXMIN_IMODE
  [(SI "TARGET_SSE4_1") (DI "TARGET_AVX512VL")])
(define_code_attr maxmin_rel
  [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])

(define_expand "<code><mode>3"
  [(parallel
    [(set (match_operand:MAXMIN_IMODE 0 "register_operand")
	  (maxmin:MAXMIN_IMODE
	    (match_operand:MAXMIN_IMODE 1 "register_operand")
	    (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
     (clobber (reg:CC FLAGS_REG))])]
  "TARGET_STV")

(define_insn_and_split "*<code><mode>3_1"
  [(set (match_operand:MAXMIN_IMODE 0 "register_operand")
	(maxmin:MAXMIN_IMODE
	  (match_operand:MAXMIN_IMODE 1 "register_operand")
	  (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "(TARGET_64BIT || <MODE>mode != DImode) && TARGET_STV
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (match_dup 0)
	(if_then_else:MAXMIN_IMODE (match_dup 3)
	  (match_dup 1)
	  (match_dup 2)))]
{
  machine_mode mode = <MODE>mode;

  if (!register_operand (operands[2], mode))
    operands[2] = force_reg (mode, operands[2]);

  enum rtx_code code = <maxmin_rel>;
  machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], operands[2]);
  rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);

  rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], operands[2]);
  emit_insn (gen_rtx_SET (flags, tmp));

  operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
})

(define_insn_and_split "*<code>di3_doubleword"
  [(set (match_operand:DI 0 "register_operand")
	(maxmin:DI (match_operand:DI 1 "register_operand")
		   (match_operand:DI 2 "nonimmediate_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
   && ix86_pre_reload_split ()"
  "#"
  "&& 1"
  [(set (match_dup 0)
	(if_then_else:SI (match_dup 6)
	  (match_dup 1)
	  (match_dup 2)))
   (set (match_dup 3)
	(if_then_else:SI (match_dup 6)
	  (match_dup 4)
	  (match_dup 5)))]
{
  if (!register_operand (operands[2], DImode))
    operands[2] = force_reg (DImode, operands[2]);

  split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);

  rtx cmplo[2] = { operands[1], operands[2] };
  rtx cmphi[2] = { operands[4], operands[5] };

  enum rtx_code code = <maxmin_rel>;

  switch (code)
    {
    case LE: case LEU:
      std::swap (cmplo[0], cmplo[1]);
      std::swap (cmphi[0], cmphi[1]);
      code = swap_condition (code);
      /* FALLTHRU */

    case GE: case GEU:
      {
	bool uns = (code == GEU);
	rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
	  = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;

	emit_insn (gen_cmp_1 (SImode, cmplo[0], cmplo[1]));

	rtx tmp = gen_rtx_SCRATCH (SImode);
	emit_insn (sbb_insn (SImode, tmp, cmphi[0], cmphi[1]));

	rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
	operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);

	break;
      }

    default:
      gcc_unreachable ();
    }
})

;; Misc patterns (?)

;; This pattern exists to put a dependency on all ebp-based memory accesses.
;; Otherwise there will be nothing to keep
;;
;; [(set (reg ebp) (reg esp))]
;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
;;  (clobber (eflags)]
;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
;;
;; in proper program order.

(define_insn "@pro_epilogue_adjust_stack_add_<mode>"
  [(set (match_operand:P 0 "register_operand" "=r,r")
	(plus:P (match_operand:P 1 "register_operand" "0,r")
	        (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (mem:BLK (scratch)))]
  ""
{
  switch (get_attr_type (insn))
    {
    case TYPE_IMOV:
      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";

    case TYPE_ALU:
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";

      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";

    default:
      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
      return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
    }
}
  [(set (attr "type")
	(cond [(and (eq_attr "alternative" "0")
		    (not (match_test "TARGET_OPT_AGU")))
		 (const_string "alu")
	       (match_operand:<MODE> 2 "const0_operand")
		 (const_string "imov")
	      ]
	      (const_string "lea")))
   (set (attr "length_immediate")
	(cond [(eq_attr "type" "imov")
		 (const_string "0")
	       (and (eq_attr "type" "alu")
		    (match_operand 2 "const128_operand"))
		 (const_string "1")
	      ]
	      (const_string "*")))
   (set_attr "mode" "<MODE>")])

(define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
  [(set (match_operand:P 0 "register_operand" "=r")
	(minus:P (match_operand:P 1 "register_operand" "0")
		 (match_operand:P 2 "register_operand" "r")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (mem:BLK (scratch)))]
  ""
  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "<MODE>")])

(define_insn "@allocate_stack_worker_probe_<mode>"
  [(set (match_operand:P 0 "register_operand" "=a")
	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
			    UNSPECV_STACK_PROBE))
   (clobber (reg:CC FLAGS_REG))]
  "ix86_target_stack_probe ()"
  "call\t___chkstk_ms"
  [(set_attr "type" "multi")
   (set_attr "length" "5")])

(define_expand "allocate_stack"
  [(match_operand 0 "register_operand")
   (match_operand 1 "general_operand")]
  "ix86_target_stack_probe ()"
{
  rtx x;

#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT 0
#endif

  if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
      && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
    x = operands[1];
  else
    {
      x = copy_to_mode_reg (Pmode, operands[1]);

      emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
    }

  x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
			   stack_pointer_rtx, 0, OPTAB_DIRECT);

  if (x != stack_pointer_rtx)
    emit_move_insn (stack_pointer_rtx, x);

  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
  DONE;
})

(define_expand "probe_stack"
  [(match_operand 0 "memory_operand")]
  ""
{
  emit_insn (gen_probe_stack_1
	     (word_mode, operands[0], const0_rtx));
  DONE;
})

;; Use OR for stack probes, this is shorter.
(define_insn "@probe_stack_1_<mode>"
  [(set (match_operand:W 0 "memory_operand" "=m")
	(unspec:W [(match_operand:W 1 "const0_operand")]
		  UNSPEC_PROBE_STACK))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "1")])
  
(define_insn "@adjust_stack_and_probe_<mode>"
  [(set (match_operand:P 0 "register_operand" "=r")
	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
			    UNSPECV_PROBE_STACK_RANGE))
   (set (reg:P SP_REG)
        (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (mem:BLK (scratch)))]
  ""
  "* return output_adjust_stack_and_probe (operands[0]);"
  [(set_attr "type" "multi")])

(define_insn "@probe_stack_range_<mode>"
  [(set (match_operand:P 0 "register_operand" "=r")
	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
			    (match_operand:P 2 "const_int_operand" "n")]
			    UNSPECV_PROBE_STACK_RANGE))
   (clobber (reg:CC FLAGS_REG))]
  ""
  "* return output_probe_stack_range (operands[0], operands[2]);"
  [(set_attr "type" "multi")])

(define_expand "builtin_setjmp_receiver"
  [(label_ref (match_operand 0))]
  "!TARGET_64BIT && flag_pic"
{
#if TARGET_MACHO
  if (TARGET_MACHO)
    {
      rtx xops[3];
      rtx_code_label *label_rtx = gen_label_rtx ();
      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
      xops[0] = xops[1] = pic_offset_table_rtx;
      xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
      ix86_expand_binary_operator (MINUS, SImode, xops);
    }
  else
#endif
    emit_insn (gen_set_got (pic_offset_table_rtx));
  DONE;
})

(define_expand "save_stack_nonlocal"
  [(set (match_operand 0 "memory_operand")
        (match_operand 1 "register_operand"))]
  ""
{
  rtx stack_slot;
  if ((flag_cf_protection & CF_RETURN))
    {
      /* Copy shadow stack pointer to the first slot and stack ppointer
	 to the second slot.  */
      rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
      stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
      rtx ssp = gen_reg_rtx (word_mode);
      emit_insn ((word_mode == SImode)
		 ? gen_rdsspsi (ssp)
		 : gen_rdsspdi (ssp));
      emit_move_insn (ssp_slot, ssp);
    }
  else
    stack_slot = adjust_address (operands[0], Pmode, 0);
  emit_move_insn (stack_slot, operands[1]);
  DONE;
})

(define_expand "restore_stack_nonlocal"
  [(set (match_operand 0 "register_operand" "")
	(match_operand 1 "memory_operand" ""))]
  ""
{
  rtx stack_slot;
  if ((flag_cf_protection & CF_RETURN))
    {
      /* Restore shadow stack pointer from the first slot and stack
	 pointer from the second slot.  */
      rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
      stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);

      rtx flags, jump, noadj_label, inc_label, loop_label;
      rtx reg_adj, reg_ssp, tmp, clob;

      /* Get the current shadow stack pointer.  The code below will check if
	 SHSTK feature is enabled.  If it is not enabled the RDSSP instruction
	 is a NOP.  */
      reg_ssp = gen_reg_rtx (word_mode);
      emit_insn (gen_rtx_SET (reg_ssp, const0_rtx));
      emit_insn ((word_mode == SImode)
		 ? gen_rdsspsi (reg_ssp)
		 : gen_rdsspdi (reg_ssp));

      /* Compare through substraction the saved and the current ssp to decide
	 if ssp has to be adjusted.  */
      tmp = gen_rtx_SET (reg_ssp, gen_rtx_MINUS (word_mode, reg_ssp,
						 ssp_slot));
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
      emit_insn (tmp);

      /* Compare and jump over adjustment code.  */
      noadj_label = gen_label_rtx ();
      flags = gen_rtx_REG (CCZmode, FLAGS_REG);
      tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
				  gen_rtx_LABEL_REF (VOIDmode, noadj_label),
				  pc_rtx);
      jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
      JUMP_LABEL (jump) = noadj_label;

      /* Compute the numebr of frames to adjust.  */
      reg_adj = gen_lowpart (ptr_mode, reg_ssp);
      tmp = gen_rtx_SET (reg_adj,
			 gen_rtx_LSHIFTRT (ptr_mode,
					   negate_rtx (ptr_mode, reg_adj),
					   GEN_INT ((word_mode == SImode)
						    ? 2
						    : 3)));
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
      emit_insn (tmp);

      /* Check if number of frames <= 255 so no loop is needed.  */
      tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255));
      flags = gen_rtx_REG (CCmode, FLAGS_REG);
      emit_insn (gen_rtx_SET (flags, tmp));

      inc_label = gen_label_rtx ();
      tmp = gen_rtx_LEU (VOIDmode, flags, const0_rtx);
      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
				  gen_rtx_LABEL_REF (VOIDmode, inc_label),
				  pc_rtx);
      jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
      JUMP_LABEL (jump) = inc_label;

      rtx reg_255 = gen_reg_rtx (word_mode);
      emit_move_insn (reg_255, GEN_INT (255));

      /* Adjust the ssp in a loop.  */
      loop_label = gen_label_rtx ();
      emit_label (loop_label);
      LABEL_NUSES (loop_label) = 1;

      emit_insn ((word_mode == SImode)
		 ? gen_incsspsi (reg_255)
		 : gen_incsspdi (reg_255));
      tmp = gen_rtx_SET (reg_adj, gen_rtx_MINUS (ptr_mode,
						 reg_adj,
						 GEN_INT (255)));
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
      emit_insn (tmp);

      tmp = gen_rtx_COMPARE (CCmode, reg_adj, GEN_INT (255));
      flags = gen_rtx_REG (CCmode, FLAGS_REG);
      emit_insn (gen_rtx_SET (flags, tmp));

      /* Jump to the loop label.  */
      tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
				  gen_rtx_LABEL_REF (VOIDmode, loop_label),
				  pc_rtx);
      jump = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
      JUMP_LABEL (jump) = loop_label;

      emit_label (inc_label);
      LABEL_NUSES (inc_label) = 1;
      emit_insn ((word_mode == SImode)
		 ? gen_incsspsi (reg_ssp)
		 : gen_incsspdi (reg_ssp));

      emit_label (noadj_label);
      LABEL_NUSES (noadj_label) = 1;
    }
  else
    stack_slot = adjust_address (operands[1], Pmode, 0);
  emit_move_insn (operands[0], stack_slot);
  DONE;
})


;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
;; Do not split instructions with mask registers.
(define_split
  [(set (match_operand 0 "general_reg_operand")
	(match_operator 3 "promotable_binary_operator"
	   [(match_operand 1 "general_reg_operand")
	    (match_operand 2 "aligned_operand")]))
   (clobber (reg:CC FLAGS_REG))]
  "! TARGET_PARTIAL_REG_STALL && reload_completed
   && ((GET_MODE (operands[0]) == HImode
	&& ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
            /* ??? next two lines just !satisfies_constraint_K (...) */
	    || !CONST_INT_P (operands[2])
	    || satisfies_constraint_K (operands[2])))
       || (GET_MODE (operands[0]) == QImode
	   && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
  [(parallel [(set (match_dup 0)
		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
  if (GET_CODE (operands[3]) != ASHIFT)
    operands[2] = gen_lowpart (SImode, operands[2]);
  operands[3] = shallow_copy_rtx (operands[3]);
  PUT_MODE (operands[3], SImode);
})

; Promote the QImode tests, as i386 has encoding of the AND
; instruction with 32-bit sign-extended immediate and thus the
; instruction size is unchanged, except in the %eax case for
; which it is increased by one byte, hence the ! optimize_size.
(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 2 "compare_operator"
	  [(and (match_operand 3 "aligned_operand")
		(match_operand 4 "const_int_operand"))
	   (const_int 0)]))
   (set (match_operand 1 "register_operand")
	(and (match_dup 3) (match_dup 4)))]
  "! TARGET_PARTIAL_REG_STALL && reload_completed
   && optimize_insn_for_speed_p ()
   && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
   /* Ensure that the operand will remain sign-extended immediate.  */
   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
  [(parallel [(set (match_dup 0)
		   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
			            (const_int 0)]))
	      (set (match_dup 1)
		   (and:SI (match_dup 3) (match_dup 4)))])]
{
  operands[4]
    = gen_int_mode (INTVAL (operands[4])
		    & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[3] = gen_lowpart (SImode, operands[3]);
})

; Don't promote the QImode tests, as i386 doesn't have encoding of
; the TEST instruction with 32-bit sign-extended immediate and thus
; the instruction size would at least double, which is not what we
; want even with ! optimize_size.
(define_split
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and (match_operand:HI 2 "aligned_operand")
		(match_operand:HI 3 "const_int_operand"))
	   (const_int 0)]))]
  "! TARGET_PARTIAL_REG_STALL && reload_completed
   && ! TARGET_FAST_PREFIX
   && optimize_insn_for_speed_p ()
   /* Ensure that the operand will remain sign-extended immediate.  */
   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
  [(set (match_dup 0)
	(match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
		         (const_int 0)]))]
{
  operands[3]
    = gen_int_mode (INTVAL (operands[3])
		    & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
  operands[2] = gen_lowpart (SImode, operands[2]);
})

(define_split
  [(set (match_operand 0 "register_operand")
	(neg (match_operand 1 "register_operand")))
   (clobber (reg:CC FLAGS_REG))]
  "! TARGET_PARTIAL_REG_STALL && reload_completed
   && (GET_MODE (operands[0]) == HImode
       || (GET_MODE (operands[0]) == QImode
	   && (TARGET_PROMOTE_QImode
	       || optimize_insn_for_size_p ())))"
  [(parallel [(set (match_dup 0)
		   (neg:SI (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
})

;; Do not split instructions with mask regs.
(define_split
  [(set (match_operand 0 "general_reg_operand")
	(not (match_operand 1 "general_reg_operand")))]
  "! TARGET_PARTIAL_REG_STALL && reload_completed
   && (GET_MODE (operands[0]) == HImode
       || (GET_MODE (operands[0]) == QImode
	   && (TARGET_PROMOTE_QImode
	       || optimize_insn_for_size_p ())))"
  [(set (match_dup 0)
	(not:SI (match_dup 1)))]
{
  operands[0] = gen_lowpart (SImode, operands[0]);
  operands[1] = gen_lowpart (SImode, operands[1]);
})

;; RTL Peephole optimizations, run before sched2.  These primarily look to
;; transform a complex memory operation into two memory to register operations.

;; Don't push memory operands
(define_peephole2
  [(set (match_operand:SWI 0 "push_operand")
	(match_operand:SWI 1 "memory_operand"))
   (match_scratch:SWI 2 "<r>")]
  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (match_dup 2))])

;; We need to handle SFmode only, because DFmode and XFmode are split to
;; SImode pushes.
(define_peephole2
  [(set (match_operand:SF 0 "push_operand")
	(match_operand:SF 1 "memory_operand"))
   (match_scratch:SF 2 "r")]
  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (match_dup 2))])

;; Don't move an immediate directly to memory when the instruction
;; gets too big, or if LCP stalls are a problem for 16-bit moves.
(define_peephole2
  [(match_scratch:SWI124 1 "<r>")
   (set (match_operand:SWI124 0 "memory_operand")
        (const_int 0))]
  "optimize_insn_for_speed_p ()
   && ((<MODE>mode == HImode
       && TARGET_LCP_STALL)
       || (!TARGET_USE_MOV0
          && TARGET_SPLIT_LONG_MOVES
          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 2) (const_int 0))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 0) (match_dup 1))]
  "operands[2] = gen_lowpart (SImode, operands[1]);")

(define_peephole2
  [(match_scratch:SWI124 2 "<r>")
   (set (match_operand:SWI124 0 "memory_operand")
        (match_operand:SWI124 1 "immediate_operand"))]
  "optimize_insn_for_speed_p ()
   && ((<MODE>mode == HImode
       && TARGET_LCP_STALL)
       || (TARGET_SPLIT_LONG_MOVES
          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (match_dup 2))])

;; Don't compare memory with zero, load and use a test instead.
(define_peephole2
  [(set (match_operand 0 "flags_reg_operand")
 	(match_operator 1 "compare_operator"
	  [(match_operand:SI 2 "memory_operand")
	   (const_int 0)]))
   (match_scratch:SI 3 "r")]
  "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
  [(set (match_dup 3) (match_dup 2))
   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])

;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
;; Don't split NOTs with a displacement operand, because resulting XOR
;; will not be pairable anyway.
;;
;; On AMD K6, NOT is vector decoded with memory operand that cannot be
;; represented using a modRM byte.  The XOR replacement is long decoded,
;; so this split helps here as well.
;;
;; Note: Can't do this as a regular split because we can't get proper
;; lifetime information then.

(define_peephole2
  [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
	(not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
  "optimize_insn_for_speed_p ()
   && ((TARGET_NOT_UNPAIRABLE
	&& (!MEM_P (operands[0])
	    || !memory_displacement_operand (operands[0], <MODE>mode)))
       || (TARGET_NOT_VECTORMODE
	   && long_memory_operand (operands[0], <MODE>mode)))
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
		   (xor:SWI124 (match_dup 1) (const_int -1)))
	      (clobber (reg:CC FLAGS_REG))])])

;; Non pairable "test imm, reg" instructions can be translated to
;; "and imm, reg" if reg dies.  The "and" form is also shorter (one
;; byte opcode instead of two, have a short form for byte operands),
;; so do it for other CPUs as well.  Given that the value was dead,
;; this should not create any new dependencies.  Pass on the sub-word
;; versions if we're concerned about partial register stalls.

(define_peephole2
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and:SI (match_operand:SI 2 "register_operand")
		   (match_operand:SI 3 "immediate_operand"))
	   (const_int 0)]))]
  "ix86_match_ccmode (insn, CCNOmode)
   && (REGNO (operands[2]) != AX_REG
       || satisfies_constraint_K (operands[3]))
   && peep2_reg_dead_p (1, operands[2])"
  [(parallel
     [(set (match_dup 0)
	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
		            (const_int 0)]))
      (set (match_dup 2)
	   (and:SI (match_dup 2) (match_dup 3)))])])

;; We don't need to handle HImode case, because it will be promoted to SImode
;; on ! TARGET_PARTIAL_REG_STALL

(define_peephole2
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and:QI (match_operand:QI 2 "register_operand")
		   (match_operand:QI 3 "immediate_operand"))
	   (const_int 0)]))]
  "! TARGET_PARTIAL_REG_STALL
   && ix86_match_ccmode (insn, CCNOmode)
   && REGNO (operands[2]) != AX_REG
   && peep2_reg_dead_p (1, operands[2])"
  [(parallel
     [(set (match_dup 0)
	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
		            (const_int 0)]))
      (set (match_dup 2)
	   (and:QI (match_dup 2) (match_dup 3)))])])

(define_peephole2
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(and:QI
	     (subreg:QI
	       (zero_extract:SI (match_operand 2 "QIreg_operand")
				(const_int 8)
				(const_int 8)) 0)
	     (match_operand 3 "const_int_operand"))
	   (const_int 0)]))]
  "! TARGET_PARTIAL_REG_STALL
   && ix86_match_ccmode (insn, CCNOmode)
   && REGNO (operands[2]) != AX_REG
   && peep2_reg_dead_p (1, operands[2])"
  [(parallel
     [(set (match_dup 0)
	   (match_op_dup 1
	     [(and:QI
		(subreg:QI
		  (zero_extract:SI (match_dup 2)
				   (const_int 8)
				   (const_int 8)) 0)
		(match_dup 3))
	      (const_int 0)]))
      (set (zero_extract:SI (match_dup 2)
			    (const_int 8)
			    (const_int 8))
	   (subreg:SI
	     (and:QI
	       (subreg:QI
		 (zero_extract:SI (match_dup 2)
				  (const_int 8)
				  (const_int 8)) 0)
	       (match_dup 3)) 0))])])

;; Don't do logical operations with memory inputs.
(define_peephole2
  [(match_scratch:SWI 2 "<r>")
   (parallel [(set (match_operand:SWI 0 "register_operand")
		   (match_operator:SWI 3 "arith_or_logical_operator"
		     [(match_dup 0)
		      (match_operand:SWI 1 "memory_operand")]))
	      (clobber (reg:CC FLAGS_REG))])]
  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
  [(set (match_dup 2) (match_dup 1))
   (parallel [(set (match_dup 0)
		   (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(match_scratch:SWI 2 "<r>")
   (parallel [(set (match_operand:SWI 0 "register_operand")
		   (match_operator:SWI 3 "arith_or_logical_operator"
		     [(match_operand:SWI 1 "memory_operand")
		      (match_dup 0)]))
	      (clobber (reg:CC FLAGS_REG))])]
  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
  [(set (match_dup 2) (match_dup 1))
   (parallel [(set (match_dup 0)
		   (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
	      (clobber (reg:CC FLAGS_REG))])])

;; Prefer Load+RegOp to Mov+MemOp.  Watch out for cases when
;; the memory address refers to the destination of the load!

(define_peephole2
  [(set (match_operand:SWI 0 "general_reg_operand")
	(match_operand:SWI 1 "general_reg_operand"))
   (parallel [(set (match_dup 0)
		   (match_operator:SWI 3 "commutative_operator"
		     [(match_dup 0)
		      (match_operand:SWI 2 "memory_operand")]))
	      (clobber (reg:CC FLAGS_REG))])]
  "REGNO (operands[0]) != REGNO (operands[1])
   && (<MODE>mode != QImode
       || any_QIreg_operand (operands[1], QImode))"
  [(set (match_dup 0) (match_dup 4))
   (parallel [(set (match_dup 0)
		   (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[4] = replace_rtx (operands[2], operands[0], operands[1], true);")

(define_peephole2
  [(set (match_operand 0 "mmx_reg_operand")
	(match_operand 1 "mmx_reg_operand"))
   (set (match_dup 0)
	(match_operator 3 "commutative_operator"
	  [(match_dup 0)
	   (match_operand 2 "memory_operand")]))]
  "REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0) (match_dup 2))
   (set (match_dup 0)
	(match_op_dup 3 [(match_dup 0) (match_dup 1)]))])

(define_peephole2
  [(set (match_operand 0 "sse_reg_operand")
	(match_operand 1 "sse_reg_operand"))
   (set (match_dup 0)
	(match_operator 3 "commutative_operator"
	  [(match_dup 0)
	   (match_operand 2 "memory_operand")]))]
  "REGNO (operands[0]) != REGNO (operands[1])"
  [(set (match_dup 0) (match_dup 2))
   (set (match_dup 0)
	(match_op_dup 3 [(match_dup 0) (match_dup 1)]))])

; Don't do logical operations with memory outputs
;
; These two don't make sense for PPro/PII -- we're expanding a 4-uop
; instruction into two 1-uop insns plus a 2-uop insn.  That last has
; the same decoder scheduling characteristics as the original.

(define_peephole2
  [(match_scratch:SWI 2 "<r>")
   (parallel [(set (match_operand:SWI 0 "memory_operand")
		   (match_operator:SWI 3 "arith_or_logical_operator"
		     [(match_dup 0)
		      (match_operand:SWI 1 "<nonmemory_operand>")]))
	      (clobber (reg:CC FLAGS_REG))])]
  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
  [(set (match_dup 2) (match_dup 0))
   (parallel [(set (match_dup 2)
		   (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 0) (match_dup 2))])

(define_peephole2
  [(match_scratch:SWI 2 "<r>")
   (parallel [(set (match_operand:SWI 0 "memory_operand")
		   (match_operator:SWI 3 "arith_or_logical_operator"
		     [(match_operand:SWI 1 "<nonmemory_operand>")
		      (match_dup 0)]))
	      (clobber (reg:CC FLAGS_REG))])]
  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
  [(set (match_dup 2) (match_dup 0))
   (parallel [(set (match_dup 2)
		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 0) (match_dup 2))])

;; Attempt to use arith or logical operations with memory outputs with
;; setting of flags.
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (parallel [(set (match_dup 0)
		   (match_operator:SWI 3 "plusminuslogic_operator"
		     [(match_dup 0)
		      (match_operand:SWI 2 "<nonmemory_operand>")]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 1) (match_dup 0))
   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && peep2_reg_dead_p (4, operands[0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], QImode)
       || any_QIreg_operand (operands[2], QImode))
   && ix86_match_ccmode (peep2_next_insn (3),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 4) (match_dup 6))
	      (set (match_dup 1) (match_dup 5))])]
{
  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
  operands[5]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
		      copy_rtx (operands[1]),
		      operands[2]);
  operands[6]
    = gen_rtx_COMPARE (GET_MODE (operands[4]),
		       copy_rtx (operands[5]),
		       const0_rtx);
})

;; Likewise for cmpelim optimized pattern.
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (parallel [(set (reg FLAGS_REG)
		   (compare (match_operator:SWI 3 "plusminuslogic_operator"
			      [(match_dup 0)
			       (match_operand:SWI 2 "<nonmemory_operand>")])
			    (const_int 0)))
	      (set (match_dup 0) (match_dup 3))])
   (set (match_dup 1) (match_dup 0))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && peep2_reg_dead_p (3, operands[0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && ix86_match_ccmode (peep2_next_insn (1),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 4) (match_dup 6))
	      (set (match_dup 1) (match_dup 5))])]
{
  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
  operands[5]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
		      copy_rtx (operands[1]), operands[2]);
  operands[6]
    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
		       const0_rtx);
})

;; Likewise for instances where we have a lea pattern.
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (set (match_operand:<LEAMODE> 3 "register_operand")
	(plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
			(match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
   (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
   (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && REGNO (operands[4]) == REGNO (operands[0])
   && REGNO (operands[5]) == REGNO (operands[3])
   && peep2_reg_dead_p (4, operands[3])
   && ((REGNO (operands[0]) == REGNO (operands[3]))
       || peep2_reg_dead_p (2, operands[0]))
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[3], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], QImode)
       || any_QIreg_operand (operands[2], QImode))
   && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
  [(parallel [(set (match_dup 6) (match_dup 8))
	      (set (match_dup 1) (match_dup 7))])]
{
  operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
  operands[7]
    = gen_rtx_PLUS (<MODE>mode,
		    copy_rtx (operands[1]),
		    gen_lowpart (<MODE>mode, operands[2]));
  operands[8]
    = gen_rtx_COMPARE (GET_MODE (operands[6]),
		       copy_rtx (operands[7]),
		       const0_rtx);
})

(define_peephole2
  [(parallel [(set (match_operand:SWI 0 "register_operand")
		   (match_operator:SWI 2 "plusminuslogic_operator"
		     [(match_dup 0)
		      (match_operand:SWI 1 "memory_operand")]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 1) (match_dup 0))
   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && COMMUTATIVE_ARITH_P (operands[2])
   && peep2_reg_dead_p (3, operands[0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && ix86_match_ccmode (peep2_next_insn (2),
			 GET_CODE (operands[2]) == PLUS
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 3) (match_dup 5))
	      (set (match_dup 1) (match_dup 4))])]
{
  operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
  operands[4]
    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
		      copy_rtx (operands[1]),
		      operands[0]);
  operands[5]
    = gen_rtx_COMPARE (GET_MODE (operands[3]),
		       copy_rtx (operands[4]),
		       const0_rtx);
})

;; Likewise for cmpelim optimized pattern.
(define_peephole2
  [(parallel [(set (reg FLAGS_REG)
		   (compare (match_operator:SWI 2 "plusminuslogic_operator"
			      [(match_operand:SWI 0 "register_operand")
			       (match_operand:SWI 1 "memory_operand")])
			    (const_int 0)))
	      (set (match_dup 0) (match_dup 2))])
   (set (match_dup 1) (match_dup 0))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && COMMUTATIVE_ARITH_P (operands[2])
   && peep2_reg_dead_p (2, operands[0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && ix86_match_ccmode (peep2_next_insn (0),
			 GET_CODE (operands[2]) == PLUS
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 3) (match_dup 5))
	      (set (match_dup 1) (match_dup 4))])]
{
  operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
  operands[4]
    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
		      copy_rtx (operands[1]), operands[0]);
  operands[5]
    = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
		       const0_rtx);
})

(define_peephole2
  [(set (match_operand:SWI12 0 "register_operand")
	(match_operand:SWI12 1 "memory_operand"))
   (parallel [(set (match_operand:SI 4 "register_operand")
		   (match_operator:SI 3 "plusminuslogic_operator"
		     [(match_dup 4)
		      (match_operand:SI 2 "nonmemory_operand")]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 1) (match_dup 0))
   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && REGNO (operands[0]) == REGNO (operands[4])
   && peep2_reg_dead_p (4, operands[0])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], SImode)
       || any_QIreg_operand (operands[2], SImode))
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && ix86_match_ccmode (peep2_next_insn (3),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 5) (match_dup 7))
	      (set (match_dup 1) (match_dup 6))])]
{
  operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
  operands[6]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
		      copy_rtx (operands[1]),
		      gen_lowpart (<MODE>mode, operands[2]));
  operands[7]
    = gen_rtx_COMPARE (GET_MODE (operands[5]),
		       copy_rtx (operands[6]),
		       const0_rtx);
})

;; peephole2 comes before regcprop, so deal also with a case that
;; would be cleaned up by regcprop.
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (parallel [(set (match_dup 0)
		   (match_operator:SWI 3 "plusminuslogic_operator"
		     [(match_dup 0)
		      (match_operand:SWI 2 "<nonmemory_operand>")]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
   (set (match_dup 1) (match_dup 4))
   (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && peep2_reg_dead_p (3, operands[0])
   && peep2_reg_dead_p (5, operands[4])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && !reg_overlap_mentioned_p (operands[4], operands[1])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], QImode)
       || any_QIreg_operand (operands[2], QImode))
   && ix86_match_ccmode (peep2_next_insn (4),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 5) (match_dup 7))
	      (set (match_dup 1) (match_dup 6))])]
{
  operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
  operands[6]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
		      copy_rtx (operands[1]),
		      operands[2]);
  operands[7]
    = gen_rtx_COMPARE (GET_MODE (operands[5]),
		       copy_rtx (operands[6]),
		       const0_rtx);
})

(define_peephole2
  [(set (match_operand:SWI12 0 "register_operand")
	(match_operand:SWI12 1 "memory_operand"))
   (parallel [(set (match_operand:SI 4 "register_operand")
		   (match_operator:SI 3 "plusminuslogic_operator"
		     [(match_dup 4)
		      (match_operand:SI 2 "nonmemory_operand")]))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
   (set (match_dup 1) (match_dup 5))
   (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && REGNO (operands[0]) == REGNO (operands[4])
   && peep2_reg_dead_p (3, operands[0])
   && peep2_reg_dead_p (5, operands[5])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], SImode)
       || any_QIreg_operand (operands[2], SImode))
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && !reg_overlap_mentioned_p (operands[5], operands[1])
   && ix86_match_ccmode (peep2_next_insn (4),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 6) (match_dup 8))
	      (set (match_dup 1) (match_dup 7))])]
{
  operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
  operands[7]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
		      copy_rtx (operands[1]),
		      gen_lowpart (<MODE>mode, operands[2]));
  operands[8]
    = gen_rtx_COMPARE (GET_MODE (operands[6]),
		       copy_rtx (operands[7]),
		       const0_rtx);
})

;; Likewise for cmpelim optimized pattern.
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (parallel [(set (reg FLAGS_REG)
		   (compare (match_operator:SWI 3 "plusminuslogic_operator"
			      [(match_dup 0)
			       (match_operand:SWI 2 "<nonmemory_operand>")])
			    (const_int 0)))
	      (set (match_dup 0) (match_dup 3))])
   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
   (set (match_dup 1) (match_dup 4))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && peep2_reg_dead_p (3, operands[0])
   && peep2_reg_dead_p (4, operands[4])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && !reg_overlap_mentioned_p (operands[4], operands[1])
   && ix86_match_ccmode (peep2_next_insn (1),
			 (GET_CODE (operands[3]) == PLUS
			  || GET_CODE (operands[3]) == MINUS)
			 ? CCGOCmode : CCNOmode)"
  [(parallel [(set (match_dup 5) (match_dup 7))
	      (set (match_dup 1) (match_dup 6))])]
{
  operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
  operands[6]
    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
		      copy_rtx (operands[1]), operands[2]);
  operands[7]
    = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
		       const0_rtx);
})

;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
;; into x = z; x ^= y; x != z
(define_peephole2
  [(set (match_operand:SWI 0 "register_operand")
	(match_operand:SWI 1 "memory_operand"))
   (set (match_operand:SWI 3 "register_operand") (match_dup 0))
   (parallel [(set (match_operand:SWI 4 "register_operand")
		   (xor:SWI (match_dup 4)
			    (match_operand:SWI 2 "<nonmemory_operand>")))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 1) (match_dup 4))
   (set (reg:CCZ FLAGS_REG)
	(compare:CCZ (match_operand:SWI 5 "register_operand")
		     (match_operand:SWI 6 "<nonmemory_operand>")))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && (REGNO (operands[4]) == REGNO (operands[0])
       || REGNO (operands[4]) == REGNO (operands[3]))
   && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
			     ? 3 : 0], operands[5])
       ? rtx_equal_p (operands[2], operands[6])
       : rtx_equal_p (operands[2], operands[5])
	 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
				  ? 3 : 0], operands[6]))
   && peep2_reg_dead_p (4, operands[4])
   && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
				    ? 3 : 0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && !reg_overlap_mentioned_p (operands[3], operands[0])
   && !reg_overlap_mentioned_p (operands[3], operands[1])
   && !reg_overlap_mentioned_p (operands[3], operands[2])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], QImode)
       || any_QIreg_operand (operands[2], QImode))"
  [(parallel [(set (match_dup 7) (match_dup 9))
	      (set (match_dup 1) (match_dup 8))])]
{
  operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
			     operands[2]);
  operands[9]
    = gen_rtx_COMPARE (GET_MODE (operands[7]),
		       copy_rtx (operands[8]),
		       const0_rtx);
})

(define_peephole2
  [(set (match_operand:SWI12 0 "register_operand")
	(match_operand:SWI12 1 "memory_operand"))
   (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
   (parallel [(set (match_operand:SI 4 "register_operand")
		   (xor:SI (match_dup 4)
			   (match_operand:SI 2 "<nonmemory_operand>")))
	      (clobber (reg:CC FLAGS_REG))])
   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
   (set (reg:CCZ FLAGS_REG)
	(compare:CCZ (match_operand:SWI12 6 "register_operand")
		     (match_operand:SWI12 7 "<nonmemory_operand>")))]
  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
   && (REGNO (operands[5]) == REGNO (operands[0])
       || REGNO (operands[5]) == REGNO (operands[3]))
   && REGNO (operands[5]) == REGNO (operands[4])
   && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
			     ? 3 : 0], operands[6])
       ? (REG_P (operands[2])
	  ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
	  : rtx_equal_p (operands[2], operands[7]))
       : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
				? 3 : 0], operands[7])
	  && REG_P (operands[2])
	  && REGNO (operands[2]) == REGNO (operands[6])))
   && peep2_reg_dead_p (4, operands[5])
   && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
				    ? 3 : 0])
   && !reg_overlap_mentioned_p (operands[0], operands[1])
   && !reg_overlap_mentioned_p (operands[0], operands[2])
   && !reg_overlap_mentioned_p (operands[3], operands[0])
   && !reg_overlap_mentioned_p (operands[3], operands[1])
   && !reg_overlap_mentioned_p (operands[3], operands[2])
   && (<MODE>mode != QImode
       || immediate_operand (operands[2], SImode)
       || any_QIreg_operand (operands[2], SImode))"
  [(parallel [(set (match_dup 8) (match_dup 10))
	      (set (match_dup 1) (match_dup 9))])]
{
  operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
			     gen_lowpart (<MODE>mode, operands[2]));
  operands[10]
    = gen_rtx_COMPARE (GET_MODE (operands[8]),
		       copy_rtx (operands[9]),
		       const0_rtx);
})

;; Attempt to optimize away memory stores of values the memory already
;; has.  See PR79593.
(define_peephole2
  [(set (match_operand 0 "register_operand")
        (match_operand 1 "memory_operand"))
   (set (match_operand 2 "memory_operand") (match_dup 0))]
  "!MEM_VOLATILE_P (operands[1])
   && !MEM_VOLATILE_P (operands[2])
   && rtx_equal_p (operands[1], operands[2])
   && !reg_overlap_mentioned_p (operands[0], operands[2])"
  [(set (match_dup 0) (match_dup 1))])

;; Attempt to always use XOR for zeroing registers (including FP modes).
(define_peephole2
  [(set (match_operand 0 "general_reg_operand")
	(match_operand 1 "const0_operand"))]
  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (const_int 0))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[0] = gen_lowpart (word_mode, operands[0]);")

(define_peephole2
  [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
	(const_int 0))]
  "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
	      (clobber (reg:CC FLAGS_REG))])])

;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
(define_peephole2
  [(set (match_operand:SWI248 0 "general_reg_operand")
	(const_int -1))]
  "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (const_int -1))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
    operands[0] = gen_lowpart (SImode, operands[0]);
})

;; Attempt to convert simple lea to add/shift.
;; These can be created by move expanders.
;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
;; relevant lea instructions were already split.

(define_peephole2
  [(set (match_operand:SWI48 0 "register_operand")
  	(plus:SWI48 (match_dup 0)
		    (match_operand:SWI48 1 "<nonmemory_operand>")))]
  "!TARGET_OPT_AGU
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:SWI48 0 "register_operand")
  	(plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
		    (match_dup 0)))]
  "!TARGET_OPT_AGU
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
  	(zero_extend:DI
	  (plus:SI (match_operand:SI 1 "register_operand")
		   (match_operand:SI 2 "nonmemory_operand"))))]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && REGNO (operands[0]) == REGNO (operands[1])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
		   (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
  	(zero_extend:DI
	  (plus:SI (match_operand:SI 1 "nonmemory_operand")
		   (match_operand:SI 2 "register_operand"))))]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && REGNO (operands[0]) == REGNO (operands[2])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
		   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:SWI48 0 "register_operand")
  	(mult:SWI48 (match_dup 0)
		    (match_operand:SWI48 1 "const_int_operand")))]
  "pow2p_hwi (INTVAL (operands[1]))
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
  	(zero_extend:DI
	  (mult:SI (match_operand:SI 1 "register_operand")
		   (match_operand:SI 2 "const_int_operand"))))]
  "TARGET_64BIT
   && pow2p_hwi (INTVAL (operands[2]))
   && REGNO (operands[0]) == REGNO (operands[1])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
		   (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
	      (clobber (reg:CC FLAGS_REG))])]
  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")

;; The ESP adjustments can be done by the push and pop instructions.  Resulting
;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
;; On many CPUs it is also faster, since special hardware to avoid esp
;; dependencies is present.

;; While some of these conversions may be done using splitters, we use
;; peepholes in order to allow combine_stack_adjustments pass to see
;; nonobfuscated RTL.

;; Convert prologue esp subtractions to push.
;; We need register to push.  In order to keep verify_flow_info happy we have
;; two choices
;; - use scratch and clobber it in order to avoid dependencies
;; - use already live register
;; We can't use the second way right now, since there is no reliable way how to
;; verify that given register is live.  First choice will also most likely in
;; fewer dependencies.  On the place of esp adjustments it is very likely that
;; call clobbered registers are dead.  We may want to use base pointer as an
;; alternative when no register is available later.

(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))
	      (clobber (mem:BLK (scratch)))])]
  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
   && ix86_red_zone_size == 0"
  [(clobber (match_dup 1))
   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
	      (clobber (mem:BLK (scratch)))])])

(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))
	      (clobber (mem:BLK (scratch)))])]
  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
   && ix86_red_zone_size == 0"
  [(clobber (match_dup 1))
   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
	      (clobber (mem:BLK (scratch)))])])

;; Convert esp subtractions to push.
(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
   && ix86_red_zone_size == 0"
  [(clobber (match_dup 1))
   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])

(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
   && ix86_red_zone_size == 0"
  [(clobber (match_dup 1))
   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])

;; Convert epilogue deallocator to pop.
(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))
	      (clobber (mem:BLK (scratch)))])]
  "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
	      (clobber (mem:BLK (scratch)))])])

;; Two pops case is tricky, since pop causes dependency
;; on destination register.  We use two registers if available.
(define_peephole2
  [(match_scratch:W 1 "r")
   (match_scratch:W 2 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))
	      (clobber (mem:BLK (scratch)))])]
  "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
	      (clobber (mem:BLK (scratch)))])
   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])

(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))
	      (clobber (mem:BLK (scratch)))])]
  "optimize_insn_for_size_p ()
   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
	      (clobber (mem:BLK (scratch)))])
   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])

;; Convert esp additions to pop.
(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])

;; Two pops case is tricky, since pop causes dependency
;; on destination register.  We use two registers if available.
(define_peephole2
  [(match_scratch:W 1 "r")
   (match_scratch:W 2 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])

(define_peephole2
  [(match_scratch:W 1 "r")
   (parallel [(set (reg:P SP_REG)
		   (plus:P (reg:P SP_REG)
			   (match_operand:P 0 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "optimize_insn_for_size_p ()
   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])

;; Convert compares with 1 to shorter inc/dec operations when CF is not
;; required and register dies.  Similarly for 128 to -128.
(define_peephole2
  [(set (match_operand 0 "flags_reg_operand")
	(match_operator 1 "compare_operator"
	  [(match_operand 2 "register_operand")
	   (match_operand 3 "const_int_operand")]))]
  "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
     && incdec_operand (operands[3], GET_MODE (operands[3])))
    || (!TARGET_FUSE_CMP_AND_BRANCH
	&& INTVAL (operands[3]) == 128))
   && ix86_match_ccmode (insn, CCGCmode)
   && peep2_reg_dead_p (1, operands[2])"
  [(parallel [(set (match_dup 0)
		   (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
	      (clobber (match_dup 2))])])

;; Convert imul by three, five and nine into lea
(define_peephole2
  [(parallel
    [(set (match_operand:SWI48 0 "register_operand")
	  (mult:SWI48 (match_operand:SWI48 1 "register_operand")
		      (match_operand:SWI48 2 "const359_operand")))
     (clobber (reg:CC FLAGS_REG))])]
  "!TARGET_PARTIAL_REG_STALL
   || <MODE>mode == SImode
   || optimize_function_for_size_p (cfun)"
  [(set (match_dup 0)
	(plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
		    (match_dup 1)))]
  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")

(define_peephole2
  [(parallel
    [(set (match_operand:SWI48 0 "register_operand")
	  (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
		      (match_operand:SWI48 2 "const359_operand")))
     (clobber (reg:CC FLAGS_REG))])]
  "optimize_insn_for_speed_p ()
   && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
  [(set (match_dup 0) (match_dup 1))
   (set (match_dup 0)
	(plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
		    (match_dup 0)))]
  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")

;; imul $32bit_imm, mem, reg is vector decoded, while
;; imul $32bit_imm, reg, reg is direct decoded.
(define_peephole2
  [(match_scratch:SWI48 3 "r")
   (parallel [(set (match_operand:SWI48 0 "register_operand")
		   (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
			       (match_operand:SWI48 2 "immediate_operand")))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
   && !satisfies_constraint_K (operands[2])"
  [(set (match_dup 3) (match_dup 1))
   (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
	      (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(match_scratch:SI 3 "r")
   (parallel [(set (match_operand:DI 0 "register_operand")
		   (zero_extend:DI
		     (mult:SI (match_operand:SI 1 "memory_operand")
			      (match_operand:SI 2 "immediate_operand"))))
	      (clobber (reg:CC FLAGS_REG))])]
  "TARGET_64BIT
   && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
   && !satisfies_constraint_K (operands[2])"
  [(set (match_dup 3) (match_dup 1))
   (parallel [(set (match_dup 0)
		   (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
	      (clobber (reg:CC FLAGS_REG))])])

;; imul $8/16bit_imm, regmem, reg is vector decoded.
;; Convert it into imul reg, reg
;; It would be better to force assembler to encode instruction using long
;; immediate, but there is apparently no way to do so.
(define_peephole2
  [(parallel [(set (match_operand:SWI248 0 "register_operand")
		   (mult:SWI248
		    (match_operand:SWI248 1 "nonimmediate_operand")
		    (match_operand:SWI248 2 "const_int_operand")))
	      (clobber (reg:CC FLAGS_REG))])
   (match_scratch:SWI248 3 "r")]
  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
   && satisfies_constraint_K (operands[2])"
  [(set (match_dup 3) (match_dup 2))
   (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
	      (clobber (reg:CC FLAGS_REG))])]
{
  if (!rtx_equal_p (operands[0], operands[1]))
    emit_move_insn (operands[0], operands[1]);
})

;; After splitting up read-modify operations, array accesses with memory
;; operands might end up in form:
;;  sall    $2, %eax
;;  movl    4(%esp), %edx
;;  addl    %edx, %eax
;; instead of pre-splitting:
;;  sall    $2, %eax
;;  addl    4(%esp), %eax
;; Turn it into:
;;  movl    4(%esp), %edx
;;  leal    (%edx,%eax,4), %eax

(define_peephole2
  [(match_scratch:W 5 "r")
   (parallel [(set (match_operand 0 "register_operand")
		   (ashift (match_operand 1 "register_operand")
			   (match_operand 2 "const_int_operand")))
	       (clobber (reg:CC FLAGS_REG))])
   (parallel [(set (match_operand 3 "register_operand")
		   (plus (match_dup 0)
			 (match_operand 4 "x86_64_general_operand")))
		   (clobber (reg:CC FLAGS_REG))])]
  "IN_RANGE (INTVAL (operands[2]), 1, 3)
   /* Validate MODE for lea.  */
   && ((!TARGET_PARTIAL_REG_STALL
	&& (GET_MODE (operands[0]) == QImode
	    || GET_MODE (operands[0]) == HImode))
       || GET_MODE (operands[0]) == SImode
       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
   && (rtx_equal_p (operands[0], operands[3])
       || peep2_reg_dead_p (2, operands[0]))
   /* We reorder load and the shift.  */
   && !reg_overlap_mentioned_p (operands[0], operands[4])"
  [(set (match_dup 5) (match_dup 4))
   (set (match_dup 0) (match_dup 1))]
{
  machine_mode op1mode = GET_MODE (operands[1]);
  machine_mode mode = op1mode == DImode ? DImode : SImode;
  int scale = 1 << INTVAL (operands[2]);
  rtx index = gen_lowpart (word_mode, operands[1]);
  rtx base = gen_lowpart (word_mode, operands[5]);
  rtx dest = gen_lowpart (mode, operands[3]);

  operands[1] = gen_rtx_PLUS (word_mode, base,
			      gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
  if (mode != word_mode)
    operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);

  operands[5] = base;
  if (op1mode != word_mode)
    operands[5] = gen_lowpart (op1mode, operands[5]);

  operands[0] = dest;
})

;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
;; That, however, is usually mapped by the OS to SIGSEGV, which is often
;; caught for use by garbage collectors and the like.  Using an insn that
;; maps to SIGILL makes it more likely the program will rightfully die.
;; Keeping with tradition, "6" is in honor of #UD.
(define_insn "trap"
  [(trap_if (const_int 1) (const_int 6))]
  ""
{
#ifdef HAVE_AS_IX86_UD2
  return "ud2";
#else
  return ASM_SHORT "0x0b0f";
#endif
}
  [(set_attr "length" "2")])

(define_insn "ud2"
  [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
  ""
{
#ifdef HAVE_AS_IX86_UD2
  return "ud2";
#else
  return ASM_SHORT "0x0b0f";
#endif
}
  [(set_attr "length" "2")])

(define_expand "prefetch"
  [(prefetch (match_operand 0 "address_operand")
	     (match_operand:SI 1 "const_int_operand")
	     (match_operand:SI 2 "const_int_operand"))]
  "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
{
  bool write = operands[1] != const0_rtx;
  int locality = INTVAL (operands[2]);

  gcc_assert (IN_RANGE (locality, 0, 3));

  /* Use 3dNOW prefetch in case we are asking for write prefetch not
     supported by SSE counterpart (non-SSE2 athlon machines) or the
     SSE prefetch is not available (K6 machines).  Otherwise use SSE
     prefetch as it allows specifying of locality.  */

  if (write)
    {
      if (TARGET_PREFETCHWT1)
	operands[2] = GEN_INT (MAX (locality, 2)); 
      else if (TARGET_PRFCHW)
	operands[2] = GEN_INT (3);
      else if (TARGET_3DNOW && !TARGET_SSE2)
	operands[2] = GEN_INT (3);
      else if (TARGET_PREFETCH_SSE)
	operands[1] = const0_rtx;
      else
	{
	  gcc_assert (TARGET_3DNOW);
	  operands[2] = GEN_INT (3);
	}
    }
  else
    {
      if (TARGET_PREFETCH_SSE)
	;
      else
	{
	  gcc_assert (TARGET_3DNOW);
	  operands[2] = GEN_INT (3);
	}
    }
})

(define_insn "*prefetch_sse"
  [(prefetch (match_operand 0 "address_operand" "p")
	     (const_int 0)
	     (match_operand:SI 1 "const_int_operand"))]
  "TARGET_PREFETCH_SSE"
{
  static const char * const patterns[4] = {
   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
  };

  int locality = INTVAL (operands[1]);
  gcc_assert (IN_RANGE (locality, 0, 3));

  return patterns[locality];
}
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "prefetch")
   (set (attr "length_address")
	(symbol_ref "memory_address_length (operands[0], false)"))
   (set_attr "memory" "none")])

(define_insn "*prefetch_3dnow"
  [(prefetch (match_operand 0 "address_operand" "p")
	     (match_operand:SI 1 "const_int_operand" "n")
	     (const_int 3))]
  "TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
{
  if (operands[1] == const0_rtx)
    return "prefetch\t%a0";
  else
    return "prefetchw\t%a0";
}
  [(set_attr "type" "mmx")
   (set (attr "length_address")
	(symbol_ref "memory_address_length (operands[0], false)"))
   (set_attr "memory" "none")])

(define_insn "*prefetch_prefetchwt1"
  [(prefetch (match_operand 0 "address_operand" "p")
	     (const_int 1)
	     (const_int 2))]
  "TARGET_PREFETCHWT1"
  "prefetchwt1\t%a0";
  [(set_attr "type" "sse")
   (set (attr "length_address")
	(symbol_ref "memory_address_length (operands[0], false)"))
   (set_attr "memory" "none")])

(define_expand "stack_protect_set"
  [(match_operand 0 "memory_operand")
   (match_operand 1 "memory_operand")]
  ""
{
  emit_insn (gen_stack_protect_set_1
	     (ptr_mode, operands[0], operands[1]));
  DONE;
})

(define_insn "@stack_protect_set_1_<mode>"
  [(set (match_operand:PTR 0 "memory_operand" "=m")
	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
		    UNSPEC_SP_SET))
   (set (match_scratch:PTR 2 "=&r") (const_int 0))
   (clobber (reg:CC FLAGS_REG))]
  ""
{
  output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
  output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
  return "xor{l}\t%k2, %k2";
}
  [(set_attr "type" "multi")])

;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
;; immediately followed by *mov{s,d}i_internal to the same register,
;; where we can avoid the xor{l} above.  We don't split this, so that
;; scheduling or anything else doesn't separate the *stack_protect_set*
;; pattern from the set of the register that overwrites the register
;; with a new value.
(define_insn "*stack_protect_set_2_<mode>"
  [(set (match_operand:PTR 0 "memory_operand" "=m")
	(unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
		    UNSPEC_SP_SET))
   (set (match_operand:SI 1 "register_operand" "=&r")
	(match_operand:SI 2 "general_operand" "g"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
   && !reg_overlap_mentioned_p (operands[1], operands[2])"
{
  output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
  output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
  if (pic_32bit_operand (operands[2], SImode)
      || ix86_use_lea_for_mov (insn, operands + 1))
    return "lea{l}\t{%E2, %1|%1, %E2}";
  else
    return "mov{l}\t{%2, %1|%1, %2}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "24")])

(define_peephole2
 [(parallel [(set (match_operand:PTR 0 "memory_operand")
		  (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
			      UNSPEC_SP_SET))
	     (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
	     (clobber (reg:CC FLAGS_REG))])
  (set (match_operand:SI 3 "general_reg_operand")
       (match_operand:SI 4))]
 "REGNO (operands[2]) == REGNO (operands[3])
  && general_operand (operands[4], SImode)
  && (general_reg_operand (operands[4], SImode)
      || memory_operand (operands[4], SImode)
      || immediate_operand (operands[4], SImode))
  && !reg_overlap_mentioned_p (operands[3], operands[4])"
 [(parallel [(set (match_dup 0)
		  (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
	     (set (match_dup 3) (match_dup 4))
	     (clobber (reg:CC FLAGS_REG))])])

(define_insn "*stack_protect_set_3"
  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
	(unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
		   UNSPEC_SP_SET))
   (set (match_operand:DI 1 "register_operand" "=&r,r,r")
	(match_operand:DI 2 "general_operand" "Z,rem,i"))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_64BIT
   && reload_completed
   && !reg_overlap_mentioned_p (operands[1], operands[2])"
{
  output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
  output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
  if (pic_32bit_operand (operands[2], DImode))
    return "lea{q}\t{%E2, %1|%1, %E2}";
  else if (which_alternative == 0)
    return "mov{l}\t{%k2, %k1|%k1, %k2}";
  else if (which_alternative == 2)
    return "movabs{q}\t{%2, %1|%1, %2}";
  else if (ix86_use_lea_for_mov (insn, operands + 1))
    return "lea{q}\t{%E2, %1|%1, %E2}";
  else
    return "mov{q}\t{%2, %1|%1, %2}";
}
  [(set_attr "type" "multi")
   (set_attr "length" "24")])

(define_peephole2
 [(parallel [(set (match_operand:DI 0 "memory_operand")
		  (unspec:DI [(match_operand:DI 1 "memory_operand")]
			     UNSPEC_SP_SET))
	     (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
	     (clobber (reg:CC FLAGS_REG))])
  (set (match_dup 2) (match_operand:DI 3))]
 "TARGET_64BIT
  && general_operand (operands[3], DImode)
  && (general_reg_operand (operands[3], DImode)
      || memory_operand (operands[3], DImode)
      || x86_64_zext_immediate_operand (operands[3], DImode)
      || x86_64_immediate_operand (operands[3], DImode)
      || (CONSTANT_P (operands[3])
	  && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
  && !reg_overlap_mentioned_p (operands[2], operands[3])"
 [(parallel [(set (match_dup 0)
		  (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
	     (set (match_dup 2) (match_dup 3))
	     (clobber (reg:CC FLAGS_REG))])])

(define_expand "stack_protect_test"
  [(match_operand 0 "memory_operand")
   (match_operand 1 "memory_operand")
   (match_operand 2)]
  ""
{
  rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);

  emit_insn (gen_stack_protect_test_1
	     (ptr_mode, flags, operands[0], operands[1]));

  emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
				  flags, const0_rtx, operands[2]));
  DONE;
})

(define_insn "@stack_protect_test_1_<mode>"
  [(set (match_operand:CCZ 0 "flags_reg_operand")
	(unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
		     (match_operand:PTR 2 "memory_operand" "m")]
		    UNSPEC_SP_TEST))
   (clobber (match_scratch:PTR 3 "=&r"))]
  ""
{
  output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
  return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
}
  [(set_attr "type" "multi")])

(define_insn "sse4_2_crc32<mode>"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec:SI
	  [(match_operand:SI 1 "register_operand" "0")
	   (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
	  UNSPEC_CRC32))]
  "TARGET_SSE4_2 || TARGET_CRC32"
  "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
  [(set_attr "type" "sselog1")
   (set_attr "prefix_rep" "1")
   (set_attr "prefix_extra" "1")
   (set (attr "prefix_data16")
     (if_then_else (match_operand:HI 2)
       (const_string "1")
       (const_string "*")))
   (set (attr "prefix_rex")
     (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
       (const_string "1")
       (const_string "*")))
   (set_attr "mode" "SI")])

(define_insn "sse4_2_crc32di"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unspec:DI
	  [(match_operand:DI 1 "register_operand" "0")
	   (match_operand:DI 2 "nonimmediate_operand" "rm")]
	  UNSPEC_CRC32))]
  "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)"
  "crc32{q}\t{%2, %0|%0, %2}"
  [(set_attr "type" "sselog1")
   (set_attr "prefix_rep" "1")
   (set_attr "prefix_extra" "1")
   (set_attr "mode" "DI")])

(define_insn "rdpmc"
  [(set (match_operand:DI 0 "register_operand" "=A")
  	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
			    UNSPECV_RDPMC))]
  "!TARGET_64BIT"
  "rdpmc"
  [(set_attr "type" "other")
   (set_attr "length" "2")])

(define_insn "rdpmc_rex64"
  [(set (match_operand:DI 0 "register_operand" "=a")
  	(unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
			    UNSPECV_RDPMC))
   (set (match_operand:DI 1 "register_operand" "=d")
	(unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
  "TARGET_64BIT"
  "rdpmc"
  [(set_attr "type" "other")
   (set_attr "length" "2")])

(define_insn "rdtsc"
  [(set (match_operand:DI 0 "register_operand" "=A")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
  "!TARGET_64BIT"
  "rdtsc"
  [(set_attr "type" "other")
   (set_attr "length" "2")])

(define_insn "rdtsc_rex64"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
   (set (match_operand:DI 1 "register_operand" "=d")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
  "TARGET_64BIT"
  "rdtsc"
  [(set_attr "type" "other")
   (set_attr "length" "2")])

(define_insn "rdtscp"
  [(set (match_operand:DI 0 "register_operand" "=A")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
   (set (match_operand:SI 1 "register_operand" "=c")
	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
  "!TARGET_64BIT"
  "rdtscp"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

(define_insn "rdtscp_rex64"
  [(set (match_operand:DI 0 "register_operand" "=a")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
   (set (match_operand:DI 1 "register_operand" "=d")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
   (set (match_operand:SI 2 "register_operand" "=c")
	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
  "TARGET_64BIT"
  "rdtscp"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FXSR, XSAVE and XSAVEOPT instructions
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define_insn "fxsave"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
  "TARGET_FXSR"
  "fxsave\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "fxsave64"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
  "TARGET_64BIT && TARGET_FXSR"
  "fxsave64\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])

(define_insn "fxrstor"
  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
		    UNSPECV_FXRSTOR)]
  "TARGET_FXSR"
  "fxrstor\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "fxrstor64"
  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
		    UNSPECV_FXRSTOR64)]
  "TARGET_64BIT && TARGET_FXSR"
  "fxrstor64\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])

(define_int_iterator ANY_XSAVE
	[UNSPECV_XSAVE
	 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
	 (UNSPECV_XSAVEC "TARGET_XSAVEC")
	 (UNSPECV_XSAVES "TARGET_XSAVES")])

(define_int_iterator ANY_XSAVE64
	[UNSPECV_XSAVE64
	 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
	 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
	 (UNSPECV_XSAVES64 "TARGET_XSAVES")])

(define_int_attr xsave
	[(UNSPECV_XSAVE "xsave")
	 (UNSPECV_XSAVE64 "xsave64")
	 (UNSPECV_XSAVEOPT "xsaveopt")
	 (UNSPECV_XSAVEOPT64 "xsaveopt64")
	 (UNSPECV_XSAVEC "xsavec")
	 (UNSPECV_XSAVEC64 "xsavec64")
	 (UNSPECV_XSAVES "xsaves")
	 (UNSPECV_XSAVES64 "xsaves64")])

(define_int_iterator ANY_XRSTOR
	[UNSPECV_XRSTOR
	 (UNSPECV_XRSTORS "TARGET_XSAVES")])

(define_int_iterator ANY_XRSTOR64
	[UNSPECV_XRSTOR64
	 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])

(define_int_attr xrstor
	[(UNSPECV_XRSTOR "xrstor")
	 (UNSPECV_XRSTOR64 "xrstor")
	 (UNSPECV_XRSTORS "xrstors")
	 (UNSPECV_XRSTORS64 "xrstors")])

(define_insn "<xsave>"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK
	 [(match_operand:DI 1 "register_operand" "A")]
	 ANY_XSAVE))]
  "!TARGET_64BIT && TARGET_XSAVE"
  "<xsave>\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "<xsave>_rex64"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK
	 [(match_operand:SI 1 "register_operand" "a")
	  (match_operand:SI 2 "register_operand" "d")]
	 ANY_XSAVE))]
  "TARGET_64BIT && TARGET_XSAVE"
  "<xsave>\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "<xsave>"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK
	 [(match_operand:SI 1 "register_operand" "a")
	  (match_operand:SI 2 "register_operand" "d")]
	 ANY_XSAVE64))]
  "TARGET_64BIT && TARGET_XSAVE"
  "<xsave>\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])

(define_insn "<xrstor>"
   [(unspec_volatile:BLK
     [(match_operand:BLK 0 "memory_operand" "m")
      (match_operand:DI 1 "register_operand" "A")]
     ANY_XRSTOR)]
  "!TARGET_64BIT && TARGET_XSAVE"
  "<xrstor>\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "<xrstor>_rex64"
   [(unspec_volatile:BLK
     [(match_operand:BLK 0 "memory_operand" "m")
      (match_operand:SI 1 "register_operand" "a")
      (match_operand:SI 2 "register_operand" "d")]
     ANY_XRSTOR)]
  "TARGET_64BIT && TARGET_XSAVE"
  "<xrstor>\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])

(define_insn "<xrstor>64"
   [(unspec_volatile:BLK
     [(match_operand:BLK 0 "memory_operand" "m")
      (match_operand:SI 1 "register_operand" "a")
      (match_operand:SI 2 "register_operand" "d")]
     ANY_XRSTOR64)]
  "TARGET_64BIT && TARGET_XSAVE"
  "<xrstor>64\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])

(define_insn "xsetbv"
  [(unspec_volatile:SI
	 [(match_operand:SI 0 "register_operand" "c")
	  (match_operand:DI 1 "register_operand" "A")]
	 UNSPECV_XSETBV)]
  "!TARGET_64BIT && TARGET_XSAVE"
  "xsetbv"
  [(set_attr "type" "other")])

(define_insn "xsetbv_rex64"
  [(unspec_volatile:SI
	 [(match_operand:SI 0 "register_operand" "c")
	  (match_operand:SI 1 "register_operand" "a")
	  (match_operand:SI 2 "register_operand" "d")]
	 UNSPECV_XSETBV)]
  "TARGET_64BIT && TARGET_XSAVE"
  "xsetbv"
  [(set_attr "type" "other")])

(define_insn "xgetbv"
  [(set (match_operand:DI 0 "register_operand" "=A")
  	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
			    UNSPECV_XGETBV))]
  "!TARGET_64BIT && TARGET_XSAVE"
  "xgetbv"
  [(set_attr "type" "other")])

(define_insn "xgetbv_rex64"
  [(set (match_operand:DI 0 "register_operand" "=a")
  	(unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
			    UNSPECV_XGETBV))
   (set (match_operand:DI 1 "register_operand" "=d")
	(unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
  "TARGET_64BIT && TARGET_XSAVE"
  "xgetbv"
  [(set_attr "type" "other")])

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Floating-point instructions for atomic compound assignments
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Clobber all floating-point registers on environment save and restore
; to ensure that the TOS value saved at fnstenv is valid after fldenv.
(define_insn "fnstenv"
  [(set (match_operand:BLK 0 "memory_operand" "=m")
	(unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
   (clobber (reg:XF ST0_REG))
   (clobber (reg:XF ST1_REG))
   (clobber (reg:XF ST2_REG))
   (clobber (reg:XF ST3_REG))
   (clobber (reg:XF ST4_REG))
   (clobber (reg:XF ST5_REG))
   (clobber (reg:XF ST6_REG))
   (clobber (reg:XF ST7_REG))]
  "TARGET_80387"
  "fnstenv\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])

(define_insn "fldenv"
  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
		    UNSPECV_FLDENV)
   (clobber (reg:XF ST0_REG))
   (clobber (reg:XF ST1_REG))
   (clobber (reg:XF ST2_REG))
   (clobber (reg:XF ST3_REG))
   (clobber (reg:XF ST4_REG))
   (clobber (reg:XF ST5_REG))
   (clobber (reg:XF ST6_REG))
   (clobber (reg:XF ST7_REG))]
  "TARGET_80387"
  "fldenv\t%0"
  [(set_attr "type" "other")
   (set_attr "memory" "load")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])

(define_insn "fnstsw"
  [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
	(unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
  "TARGET_80387"
  "fnstsw\t%0"
  [(set_attr "type" "other,other")
   (set_attr "memory" "none,store")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])

(define_insn "fnclex"
  [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
  "TARGET_80387"
  "fnclex"
  [(set_attr "type" "other")
   (set_attr "memory" "none")
   (set_attr "length" "2")])

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; LWP instructions
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define_expand "lwp_llwpcb"
  [(unspec_volatile [(match_operand 0 "register_operand")]
		    UNSPECV_LLWP_INTRINSIC)]
  "TARGET_LWP")

(define_insn "*lwp_llwpcb<mode>_1"
  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
		    UNSPECV_LLWP_INTRINSIC)]
  "TARGET_LWP"
  "llwpcb\t%0"
  [(set_attr "type" "lwp")
   (set_attr "mode" "<MODE>")
   (set_attr "length" "5")])

(define_expand "lwp_slwpcb"
  [(set (match_operand 0 "register_operand")
	(unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
  "TARGET_LWP"
  "emit_insn (gen_lwp_slwpcb_1 (Pmode, operands[0])); DONE;")

(define_insn "@lwp_slwpcb<mode>_1"
  [(set (match_operand:P 0 "register_operand" "=r")
	(unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
  "TARGET_LWP"
  "slwpcb\t%0"
  [(set_attr "type" "lwp")
   (set_attr "mode" "<MODE>")
   (set_attr "length" "5")])

(define_expand "lwp_lwpval<mode>3"
  [(unspec_volatile [(match_operand:SWI48 1 "register_operand")
    	    	     (match_operand:SI 2 "nonimmediate_operand")
		     (match_operand:SI 3 "const_int_operand")]
		    UNSPECV_LWPVAL_INTRINSIC)]
  "TARGET_LWP"
  ;; Avoid unused variable warning.
  "(void) operands[0];")

(define_insn "*lwp_lwpval<mode>3_1"
  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
    	    	     (match_operand:SI 1 "nonimmediate_operand" "rm")
		     (match_operand:SI 2 "const_int_operand" "i")]
		    UNSPECV_LWPVAL_INTRINSIC)]
  "TARGET_LWP"
  "lwpval\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "lwp")
   (set_attr "mode" "<MODE>")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])

(define_expand "lwp_lwpins<mode>3"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand")
			      (match_operand:SI 2 "nonimmediate_operand")
			      (match_operand:SI 3 "const_int_operand")]
			     UNSPECV_LWPINS_INTRINSIC))
   (set (match_operand:QI 0 "nonimmediate_operand")
	(eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
  "TARGET_LWP")

(define_insn "*lwp_lwpins<mode>3_1"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
			      (match_operand:SI 1 "nonimmediate_operand" "rm")
			      (match_operand:SI 2 "const_int_operand" "i")]
			     UNSPECV_LWPINS_INTRINSIC))]
  "TARGET_LWP"
  "lwpins\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "type" "lwp")
   (set_attr "mode" "<MODE>")
   (set (attr "length")
        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])

(define_int_iterator RDFSGSBASE
	[UNSPECV_RDFSBASE
	 UNSPECV_RDGSBASE])

(define_int_iterator WRFSGSBASE
	[UNSPECV_WRFSBASE
	 UNSPECV_WRGSBASE])

(define_int_attr fsgs
	[(UNSPECV_RDFSBASE "fs")
	 (UNSPECV_RDGSBASE "gs")
	 (UNSPECV_WRFSBASE "fs")
	 (UNSPECV_WRGSBASE "gs")])

(define_insn "rd<fsgs>base<mode>"
  [(set (match_operand:SWI48 0 "register_operand" "=r")
	(unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
  "TARGET_64BIT && TARGET_FSGSBASE"
  "rd<fsgs>base\t%0"
  [(set_attr "type" "other")
   (set_attr "prefix_extra" "2")])

(define_insn "wr<fsgs>base<mode>"
  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
		    WRFSGSBASE)]
  "TARGET_64BIT && TARGET_FSGSBASE"
  "wr<fsgs>base\t%0"
  [(set_attr "type" "other")
   (set_attr "prefix_extra" "2")])

(define_insn "ptwrite<mode>"
  [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
		    UNSPECV_PTWRITE)]
  "TARGET_PTWRITE"
  "ptwrite\t%0"
  [(set_attr "type" "other")
   (set_attr "prefix_extra" "2")])

(define_insn "rdrand<mode>_1"
  [(set (match_operand:SWI248 0 "register_operand" "=r")
	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
   (set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
  "TARGET_RDRND"
  "rdrand\t%0"
  [(set_attr "type" "other")
   (set_attr "prefix_extra" "1")])

(define_insn "rdseed<mode>_1"
  [(set (match_operand:SWI248 0 "register_operand" "=r")
	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
   (set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
  "TARGET_RDSEED"
  "rdseed\t%0"
  [(set_attr "type" "other")
   (set_attr "prefix_extra" "1")])

(define_expand "pause"
  [(set (match_dup 0)
	(unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
  ""
{
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
  MEM_VOLATILE_P (operands[0]) = 1;
})

;; Use "rep; nop", instead of "pause", to support older assemblers.
;; They have the same encoding.
(define_insn "*pause"
  [(set (match_operand:BLK 0)
	(unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
  ""
  "rep%; nop"
  [(set_attr "length" "2")
   (set_attr "memory" "unknown")])

;; CET instructions
(define_insn "rdssp<mode>"
  [(set (match_operand:SWI48x 0 "register_operand" "=r")
	(unspec_volatile:SWI48x [(const_int 0)] UNSPECV_NOP_RDSSP))]
  "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
  "xor{l}\t%k0, %k0\n\trdssp<mskmodesuffix>\t%0"
  [(set_attr "length" "6")
   (set_attr "type" "other")])

(define_insn "incssp<mode>"
  [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")]
		    UNSPECV_INCSSP)]
  "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
  "incssp<mskmodesuffix>\t%0"
  [(set_attr "length" "4")
   (set_attr "type" "other")])

(define_insn "saveprevssp"
  [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
  "TARGET_SHSTK"
  "saveprevssp"
  [(set_attr "length" "5")
   (set_attr "type" "other")])

(define_expand "rstorssp"
  [(unspec_volatile [(match_operand 0 "memory_operand")]
		    UNSPECV_RSTORSSP)]
  "TARGET_SHSTK")

(define_insn "*rstorssp<mode>"
  [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")]
		    UNSPECV_RSTORSSP)]
  "TARGET_SHSTK"
  "rstorssp\t%0"
  [(set_attr "length" "5")
   (set_attr "type" "other")])

(define_insn "wrss<mode>"
  [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")
		     (match_operand:SWI48x 1 "memory_operand" "m")]
		    UNSPECV_WRSS)]
  "TARGET_SHSTK"
  "wrss<mskmodesuffix>\t%0, %1"
  [(set_attr "length" "3")
   (set_attr "type" "other")])

(define_insn "wruss<mode>"
  [(unspec_volatile [(match_operand:SWI48x 0 "register_operand" "r")
		     (match_operand:SWI48x 1 "memory_operand" "m")]
		    UNSPECV_WRUSS)]
  "TARGET_SHSTK"
  "wruss<mskmodesuffix>\t%0, %1"
  [(set_attr "length" "4")
   (set_attr "type" "other")])

(define_insn "setssbsy"
  [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
  "TARGET_SHSTK"
  "setssbsy"
  [(set_attr "length" "4")
   (set_attr "type" "other")])

(define_expand "clrssbsy"
  [(unspec_volatile [(match_operand 0 "memory_operand")]
		    UNSPECV_CLRSSBSY)]
  "TARGET_SHSTK")

(define_insn "*clrssbsy<mode>"
  [(unspec_volatile [(match_operand:P 0 "memory_operand" "m")]
		    UNSPECV_CLRSSBSY)]
  "TARGET_SHSTK"
  "clrssbsy\t%0"
  [(set_attr "length" "4")
   (set_attr "type" "other")])

(define_insn "nop_endbr"
  [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
  "(flag_cf_protection & CF_BRANCH)"
{
  return TARGET_64BIT ? "endbr64" : "endbr32";
}
  [(set_attr "length" "4")
   (set_attr "length_immediate" "0")
   (set_attr "modrm" "0")])

;; For RTM support
(define_expand "xbegin"
  [(set (match_operand:SI 0 "register_operand")
	(unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
  "TARGET_RTM"
{
  rtx_code_label *label = gen_label_rtx ();

  /* xbegin is emitted as jump_insn, so reload won't be able
     to reload its operand.  Force the value into AX hard register.  */
  rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
  emit_move_insn (ax_reg, constm1_rtx);

  emit_jump_insn (gen_xbegin_1 (ax_reg, label));

  emit_label (label);
  LABEL_NUSES (label) = 1;

  emit_move_insn (operands[0], ax_reg);

  DONE;
})

(define_insn "xbegin_1"
  [(set (pc)
	(if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
			  (const_int 0))
		      (label_ref (match_operand 1))
		      (pc)))
   (set (match_operand:SI 0 "register_operand" "+a")
	(unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
  "TARGET_RTM"
  "xbegin\t%l1"
  [(set_attr "type" "other")
   (set_attr "length" "6")])

(define_insn "xend"
  [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
  "TARGET_RTM"
  "xend"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

(define_insn "xabort"
  [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand" "n")]
		    UNSPECV_XABORT)]
  "TARGET_RTM"
  "xabort\t%0"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

(define_expand "xtest"
  [(set (match_operand:QI 0 "register_operand")
	(unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
  "TARGET_RTM"
{
  emit_insn (gen_xtest_1 ());

  ix86_expand_setcc (operands[0], NE,
		     gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
  DONE;
})

(define_insn "xtest_1"
  [(set (reg:CCZ FLAGS_REG)
	(unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
  "TARGET_RTM"
  "xtest"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

(define_insn "clwb"
  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
                   UNSPECV_CLWB)]
  "TARGET_CLWB"
  "clwb\t%a0"
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "fence")
   (set_attr "memory" "unknown")])

(define_insn "clflushopt"
  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
                   UNSPECV_CLFLUSHOPT)]
  "TARGET_CLFLUSHOPT"
  "clflushopt\t%a0"
  [(set_attr "type" "sse")
   (set_attr "atom_sse_attr" "fence")
   (set_attr "memory" "unknown")])

;; MONITORX and MWAITX
(define_insn "mwaitx"
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
		     (match_operand:SI 1 "register_operand" "a")
		     (match_operand:SI 2 "register_operand" "b")]
		   UNSPECV_MWAITX)]
  "TARGET_MWAITX"
;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
;; Since 32bit register operands are implicitly zero extended to 64bit,
;; we only need to set up 32bit registers.
  "mwaitx"
  [(set_attr "length" "3")])

(define_insn "@monitorx_<mode>"
  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
		     (match_operand:SI 1 "register_operand" "c")
		     (match_operand:SI 2 "register_operand" "d")]
		   UNSPECV_MONITORX)]
  "TARGET_MWAITX"
;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
;; RCX and RDX are used.  Since 32bit register operands are implicitly
;; zero extended to 64bit, we only need to set up 32bit registers.
  "%^monitorx"
  [(set (attr "length")
     (symbol_ref ("(Pmode != word_mode) + 3")))])

;; CLZERO
(define_insn "@clzero_<mode>"
  [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
                   UNSPECV_CLZERO)]
  "TARGET_CLZERO"
  "clzero"
  [(set_attr "length" "3")
  (set_attr "memory" "unknown")])

;; RDPKRU and WRPKRU

(define_expand "rdpkru"
  [(parallel
     [(set (match_operand:SI 0 "register_operand")
	   (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
      (set (match_dup 2) (const_int 0))])]
  "TARGET_PKU"
{
  operands[1] = force_reg (SImode, const0_rtx);
  operands[2] = gen_reg_rtx (SImode);
})

(define_insn "*rdpkru"
  [(set (match_operand:SI 0 "register_operand" "=a")
	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
			    UNSPECV_PKU))
   (set (match_operand:SI 1 "register_operand" "=d")
	(const_int 0))]
  "TARGET_PKU"
  "rdpkru"
  [(set_attr "type" "other")])

(define_expand "wrpkru"
  [(unspec_volatile:SI
     [(match_operand:SI 0 "register_operand")
      (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
  "TARGET_PKU"
{
  operands[1] = force_reg (SImode, const0_rtx);
  operands[2] = force_reg (SImode, const0_rtx);
})

(define_insn "*wrpkru"
  [(unspec_volatile:SI
     [(match_operand:SI 0 "register_operand" "a")
      (match_operand:SI 1 "register_operand" "d")
      (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
  "TARGET_PKU"
  "wrpkru"
  [(set_attr "type" "other")])

(define_insn "rdpid"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
  "!TARGET_64BIT && TARGET_RDPID"
  "rdpid\t%0"
  [(set_attr "type" "other")])

(define_insn "rdpid_rex64"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
  "TARGET_64BIT && TARGET_RDPID"
  "rdpid\t%0"
  [(set_attr "type" "other")])

;; Intirinsics for > i486

(define_insn "wbinvd"
  [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
  ""
  "wbinvd"
  [(set_attr "type" "other")])

(define_insn "wbnoinvd"
  [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
  "TARGET_WBNOINVD"
  "wbnoinvd"
  [(set_attr "type" "other")])

;; MOVDIRI and MOVDIR64B

(define_insn "movdiri<mode>"
  [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
			   (match_operand:SWI48 1 "register_operand" "r")]
			  UNSPECV_MOVDIRI)]
  "TARGET_MOVDIRI"
  "movdiri\t{%1, %0|%0, %1}"
  [(set_attr "type" "other")])

(define_insn "@movdir64b_<mode>"
  [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
			(match_operand:XI 1 "memory_operand")]
		       UNSPECV_MOVDIR64B)]
  "TARGET_MOVDIR64B"
  "movdir64b\t{%1, %0|%0, %1}"
  [(set_attr "type" "other")])

;; ENQCMD and ENQCMDS

(define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
(define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])

(define_insn "@enqcmd<enqcmd_sfx>_<mode>"
  [(set (reg:CCZ FLAGS_REG)
	(unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
			      (match_operand:XI 1 "memory_operand" "m")]
			     ENQCMD))]
  "TARGET_ENQCMD"
  "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
  [(set_attr "type" "other")])

;; WAITPKG

(define_insn "umwait"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
			      (match_operand:DI 1 "register_operand" "A")]
			     UNSPECV_UMWAIT))]
  "!TARGET_64BIT && TARGET_WAITPKG"
  "umwait\t%0"
  [(set_attr "length" "3")])

(define_insn "umwait_rex64"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
			      (match_operand:SI 1 "register_operand" "a")
			      (match_operand:SI 2 "register_operand" "d")]
			     UNSPECV_UMWAIT))]
  "TARGET_64BIT && TARGET_WAITPKG"
  "umwait\t%0"
  [(set_attr "length" "3")])

(define_insn "@umonitor_<mode>"
  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
		    UNSPECV_UMONITOR)]
  "TARGET_WAITPKG"
  "umonitor\t%0"
  [(set (attr "length")
     (symbol_ref ("(Pmode != word_mode) + 3")))])

(define_insn "tpause"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
			      (match_operand:DI 1 "register_operand" "A")]
			     UNSPECV_TPAUSE))]
  "!TARGET_64BIT && TARGET_WAITPKG"
  "tpause\t%0"
  [(set_attr "length" "3")])

(define_insn "tpause_rex64"
  [(set (reg:CCC FLAGS_REG)
	(unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
			      (match_operand:SI 1 "register_operand" "a")
			      (match_operand:SI 2 "register_operand" "d")]
			     UNSPECV_TPAUSE))]
  "TARGET_64BIT && TARGET_WAITPKG"
  "tpause\t%0"
  [(set_attr "length" "3")])

(define_insn "cldemote"
  [(unspec_volatile[(match_operand 0 "address_operand" "p")]
		 UNSPECV_CLDEMOTE)]
  "TARGET_CLDEMOTE"
  "cldemote\t%a0"
  [(set_attr "type" "other")
   (set_attr "memory" "unknown")])

(define_insn "speculation_barrier"
  [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
  ""
  "lfence"
  [(set_attr "type" "other")
   (set_attr "length" "3")])

(include "mmx.md")
(include "sse.md")
(include "sync.md")