Mercurial > hg > CbC > CbC_gcc
diff gcc/config/i386/sse.md @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | 3bfb6c00c1e0 |
children | b7f97abdc517 |
line wrap: on
line diff
--- a/gcc/config/i386/sse.md Sun Feb 07 18:28:00 2010 +0900 +++ b/gcc/config/i386/sse.md Fri Feb 12 23:39:51 2010 +0900 @@ -19,12 +19,12 @@ ;; <http://www.gnu.org/licenses/>. -;; 16 byte integral modes handled by SSE, minus TImode, which gets -;; special-cased for TARGET_64BIT. +;; 16 byte integral modes handled by SSE (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI]) ;; All 16-byte vector modes handled by SSE (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF]) ;; 32 byte integral vector modes handled by AVX (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI]) @@ -39,7 +39,10 @@ (define_mode_iterator AVXMODEDI [V4DI V2DI]) ;; All vector modes handled by AVX -(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) +(define_mode_iterator AVXMODE + [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) +(define_mode_iterator AVXMODE16 + [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) ;; Mix-n-match (define_mode_iterator SSEMODE12 [V16QI V8HI]) @@ -49,14 +52,18 @@ (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) +(define_mode_iterator FMA4MODEF4 [V8SF V4DF]) (define_mode_iterator SSEMODEF2P [V4SF V2DF]) (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) +(define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF]) (define_mode_iterator AVX256MODE4P [V4DI V4DF]) (define_mode_iterator AVX256MODE8P [V8SI V8SF]) (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) (define_mode_iterator AVXMODEF4P [V4SF V4DF]) +(define_mode_iterator AVXMODEFDP [V2DF V4DF]) +(define_mode_iterator AVXMODEFSP [V4SF V8SF]) (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) @@ -66,7 +73,15 @@ ;; Modes handled by integer vcond pattern (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI - (V2DI "TARGET_SSE4_2 || TARGET_SSE5")]) + (V2DI "TARGET_SSE4_2")]) + +;; Modes handled by vec_extract_even/odd pattern. +(define_mode_iterator SSEMODE_EO + [(V4SF "TARGET_SSE") + (V2DF "TARGET_SSE2") + (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2") + (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2") + (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) ;; Mapping from float mode to required SSE level (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) @@ -74,14 +89,20 @@ ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) -;; Mapping of the sse5 suffix +;; Mapping of the fma4 suffix +(define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")]) +(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") + (V4SF "ss") (V2DF "sd")]) + +;; Mapping of the avx suffix (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")]) -(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") - (V4SF "ss") (V2DF "sd")]) + (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) -;; Mapping of the max integer size for sse5 rotate immediate constraint +(define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")]) + +;; Mapping of the max integer size for xop rotate immediate constraint (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) ;; Mapping of vector modes back to the scalar modes @@ -90,39 +111,41 @@ (V4SI "SI") (V2DI "DI")]) ;; Mapping of vector modes to a vector mode of double size -(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI") - (V4SF "V8SF") (V4SI "V8SI")]) +(define_mode_attr ssedoublesizemode + [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI") + (V8HI "V16HI") (V16QI "V32QI") + (V4DF "V8DF") (V8SF "V16SF") + (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")]) ;; Number of scalar elements in each vector type -(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2") - (V16QI "16") (V8HI "8") - (V4SI "4") (V2DI "2")]) +(define_mode_attr ssescalarnum + [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") + (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")]) ;; Mapping for AVX (define_mode_attr avxvecmode - [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF") - (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") - (V8SF "V8SF") (V4DF "V4DF")]) + [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") + (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF") + (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")]) (define_mode_attr avxvecpsmode [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF") (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")]) (define_mode_attr avxhalfvecmode - [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") - (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")]) + [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") + (V8SF "V4SF") (V4DF "V2DF") + (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")]) (define_mode_attr avxscalarmode - [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF") - (V8SF "SF") (V4DF "DF")]) + [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF") + (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")]) (define_mode_attr avxcvtvecmode [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")]) (define_mode_attr avxpermvecmode [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) (define_mode_attr avxmodesuffixf2c - [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")]) + [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")]) (define_mode_attr avxmodesuffixp [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si") (V4DF "pd")]) -(define_mode_attr avxmodesuffixs - [(V16QI "b") (V8HI "w") (V4SI "d")]) (define_mode_attr avxmodesuffix [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") (V8SI "256") (V8SF "256") (V4DF "256")]) @@ -131,10 +154,6 @@ (define_mode_attr blendbits [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) -;; Mapping of immediate bits for vpermil instructions -(define_mode_attr vpermilbits - [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")]) - ;; Mapping of immediate bits for pinsr instructions (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")]) @@ -156,8 +175,8 @@ }) (define_insn "*avx_mov<mode>_internal" - [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] + [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] "TARGET_AVX && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -191,8 +210,8 @@ ;; This is essential for maintaining stable calling conventions. (define_expand "mov<mode>" - [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") - (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] + [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") + (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (<MODE>mode, operands); @@ -200,8 +219,8 @@ }) (define_insn "*mov<mode>_internal" - [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] + [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] "TARGET_SSE && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -267,12 +286,14 @@ gen_rtx_SUBREG (SImode, operands[1], 0))); emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), gen_rtx_SUBREG (SImode, operands[1], 4))); - emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2])); + emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], + operands[2])); } else if (memory_operand (operands[1], DImode)) - emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx)); + emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), + operands[1], const0_rtx)); else - gcc_unreachable (); + gcc_unreachable (); }) (define_split @@ -308,7 +329,7 @@ }) (define_expand "push<mode>1" - [(match_operand:SSEMODE 0 "register_operand" "")] + [(match_operand:SSEMODE16 0 "register_operand" "")] "TARGET_SSE" { ix86_expand_push (<MODE>mode, operands[0]); @@ -325,8 +346,8 @@ }) (define_expand "movmisalign<mode>" - [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") - (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] + [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") + (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move_misalign (<MODE>mode, operands); @@ -342,6 +363,7 @@ && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -367,6 +389,7 @@ && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "mode" "<MODE>")]) (define_insn "avx_movdqu<avxmodesuffix>" @@ -377,6 +400,7 @@ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "vmovdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")]) @@ -387,6 +411,7 @@ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "movdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -428,7 +453,7 @@ UNSPEC_MOVNT))] "TARGET_SSE2" "movntdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -438,7 +463,8 @@ UNSPEC_MOVNT))] "TARGET_SSE2" "movnti\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") + (set_attr "prefix_data16" "0") (set_attr "mode" "V2DF")]) (define_insn "avx_lddqu<avxmodesuffix>" @@ -449,6 +475,7 @@ "TARGET_AVX" "vlddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")]) @@ -458,7 +485,9 @@ UNSPEC_LDDQU))] "TARGET_SSE3" "lddqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "1") (set_attr "mode" "TI")]) @@ -649,7 +678,7 @@ { ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands); - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -765,6 +794,7 @@ "TARGET_SSE" "%vrcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) @@ -791,6 +821,7 @@ "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "mode" "SF")]) (define_expand "sqrtv8sf2" @@ -798,7 +829,7 @@ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))] "TARGET_AVX" { - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -836,6 +867,7 @@ "TARGET_SSE" "%vsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) @@ -880,6 +912,7 @@ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "mode" "<ssescalarmode>")]) (define_expand "rsqrtv8sf2" @@ -1043,7 +1076,7 @@ (const_int 1)))] "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") + [(set_attr "type" "sseadd") (set_attr "mode" "<ssescalarmode>")]) ;; These versions of the min/max patterns implement exactly the operations @@ -1179,6 +1212,7 @@ "TARGET_SSE3" "addsubpd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") (set_attr "mode" "V2DF")]) (define_insn "avx_h<plusminus_insn>v4df3" @@ -1302,6 +1336,7 @@ "TARGET_SSE3" "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_rep" "1") (set_attr "mode" "V4SF")]) @@ -1401,6 +1436,7 @@ "TARGET_AVX" "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -1417,6 +1453,7 @@ "TARGET_AVX" "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<ssescalarmode>")]) @@ -1431,6 +1468,7 @@ "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecmp") (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") (set_attr "mode" "<avxvecmode>")]) (define_insn "<sse>_maskcmp<mode>3" @@ -1438,10 +1476,11 @@ (match_operator:SSEMODEF4 3 "sse_comparison_operator" [(match_operand:SSEMODEF4 1 "register_operand" "0") (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] - "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode)) - && !TARGET_SSE5" + "!TARGET_XOP + && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") (set_attr "mode" "<MODE>")]) (define_insn "<sse>_vmmaskcmp<mode>3" @@ -1452,9 +1491,10 @@ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") (set_attr "mode" "<ssescalarmode>")]) (define_insn "<sse>_comi" @@ -1470,6 +1510,11 @@ "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "maybe_vex") + (set_attr "prefix_rep" "0") + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "DF") + (const_string "1") + (const_string "0"))) (set_attr "mode" "<MODE>")]) (define_insn "<sse>_ucomi" @@ -1485,6 +1530,11 @@ "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "maybe_vex") + (set_attr "prefix_rep" "0") + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "DF") + (const_string "1") + (const_string "0"))) (set_attr "mode" "<MODE>")]) (define_expand "vcond<mode>" @@ -1533,7 +1583,7 @@ (define_expand "<code><mode>3" [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") - (plogic:AVX256MODEF2P + (any_logic:AVX256MODEF2P (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)" @@ -1541,19 +1591,19 @@ (define_insn "*avx_<code><mode>3" [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (plogic:AVXMODEF2P + (any_logic:AVXMODEF2P (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" + "v<logicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")]) (define_expand "<code><mode>3" [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (plogic:SSEMODEF2P + (any_logic:SSEMODEF2P (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" @@ -1561,15 +1611,33 @@ (define_insn "*<code><mode>3" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (plogic:SSEMODEF2P + (any_logic:SSEMODEF2P (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}" + "<logicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "<MODE>")]) +(define_expand "copysign<mode>3" + [(set (match_dup 4) + (and:SSEMODEF2P + (not:SSEMODEF2P (match_dup 3)) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))) + (set (match_dup 5) + (and:SSEMODEF2P (match_dup 3) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" ""))) + (set (match_operand:SSEMODEF2P 0 "register_operand" "") + (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))] + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" +{ + operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0); + + operands[4] = gen_reg_rtx (<MODE>mode); + operands[5] = gen_reg_rtx (<MODE>mode); +}) + ;; Also define scalar versions. These are used for abs, neg, and ;; conditional move. Using subregs into vector modes causes register ;; allocation lossage. These patterns do not allow memory operands @@ -1600,29 +1668,30 @@ (define_insn "*avx_<code><mode>3" [(set (match_operand:MODEF 0 "register_operand" "=x") - (plogic:MODEF + (any_logic:MODEF (match_operand:MODEF 1 "register_operand" "x") (match_operand:MODEF 2 "register_operand" "x")))] "AVX_FLOAT_MODE_P (<MODE>mode)" - "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}" + "v<logicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "<ssevecmode>")]) (define_insn "*<code><mode>3" [(set (match_operand:MODEF 0 "register_operand" "=x") - (plogic:MODEF + (any_logic:MODEF (match_operand:MODEF 1 "register_operand" "0") (match_operand:MODEF 2 "register_operand" "x")))] "SSE_FLOAT_MODE_P (<MODE>mode)" - "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}" + "<logicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "<ssevecmode>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; SSE5 floating point multiply/accumulate instructions This includes the -;; scalar version of the instructions as well as the vector +;; FMA4 floating point multiply/accumulate instructions. This +;; includes the scalar version of the instructions as well as the +;; vector. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1637,547 +1706,700 @@ ;; (set (reg2) (mult (reg1) (mem (addr2)))) ;; (set (reg3) (plus (reg2) (mem (addr3)))) -(define_insn "sse5_fmadd<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") - (plus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" +(define_insn "fma4_fmadd<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (plus:FMA4MODEF4 + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +;; Floating multiply and subtract. +(define_insn "fma4_fmsub<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (minus:FMA4MODEF4 + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Split fmadd with two memory operands into a load and the fmadd. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") +;; Floating point negative multiply and add. +;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). +(define_insn "fma4_fnmadd<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (minus:FMA4MODEF4 + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +;; Floating point negative multiply and subtract. +(define_insn "fma4_fnmsub<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (minus:FMA4MODEF4 + (mult:FMA4MODEF4 + (neg:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "fma4_fmadd<mode>4" + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (plus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) + (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) ;; For the scalar operations, use operand1 for the upper words that aren't ;; modified, so restrict the forms that are generated. -;; Scalar version of fmadd -(define_insn "sse5_vmfmadd<mode>4" +;; Scalar version of fmadd. +(define_insn "fma4_vmfmadd<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P (plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) + (match_dup 0) (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Floating multiply and subtract -;; Allow two memory operands the same as fmadd -(define_insn "sse5_fmsub<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") +;; Floating multiply and subtract. +;; Allow two memory operands the same as fmadd. +(define_insn "fma4_fmsub<mode>4" + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (minus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Split fmsub with two memory operands into a load and the fmsub. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - ;; For the scalar operations, use operand1 for the upper words that aren't ;; modified, so restrict the forms that are generated. -;; Scalar version of fmsub -(define_insn "sse5_vmfmsub<mode>4" +;; Scalar version of fmsub. +(define_insn "fma4_vmfmsub<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) + (match_dup 0) (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Floating point negative multiply and add -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -;; Allow two memory operands to help in optimizing. -(define_insn "sse5_fnmadd<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") +;; Floating point negative multiply and add. +;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). +(define_insn "fma4_fnmadd<mode>4" + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0") + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Split fnmadd with two memory operands into a load and the fnmadd. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "") - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - ;; For the scalar operations, use operand1 for the upper words that aren't ;; modified, so restrict the forms that are generated. -;; Scalar version of fnmadd -(define_insn "sse5_vmfnmadd<mode>4" +;; Scalar version of fnmadd. +(define_insn "fma4_vmfnmadd<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P (minus:SSEMODEF2P (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) - (match_dup 1) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) + (match_dup 0) (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Floating point negative multiply and subtract -;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c -;; Allow 2 memory operands to help with optimization -(define_insn "sse5_fnmsub<mode>4" +;; Floating point negative multiply and subtract. +;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c. +(define_insn "fma4_fnmsub<mode>4" [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (minus:SSEMODEF4 (mult:SSEMODEF4 (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" - "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Split fnmsub with two memory operands into a load and the fmsub. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - ;; For the scalar operations, use operand1 for the upper words that aren't ;; modified, so restrict the forms that are generated. -;; Scalar version of fnmsub -(define_insn "sse5_vmfnmsub<mode>4" +;; Scalar version of fnmsub. +(define_insn "fma4_vmfnmsub<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) + (match_dup 0) (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" - "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "fma4i_fmadd<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (unspec:FMA4MODEF4 + [(plus:FMA4MODEF4 + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; The same instructions using an UNSPEC to allow the intrinsic to be used -;; even if the user used -mno-fused-madd -;; Parallel instructions. During instruction generation, just default -;; to registers, and let combine later build the appropriate instruction. -(define_expand "sse5i_fmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") +(define_insn "fma4i_fmsub<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (unspec:FMA4MODEF4 + [(minus:FMA4MODEF4 + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "fma4i_fnmadd<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (unspec:FMA4MODEF4 + [(minus:FMA4MODEF4 + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") + (mult:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "fma4i_fnmsub<mode>4256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") + (unspec:FMA4MODEF4 + [(minus:FMA4MODEF4 + (mult:FMA4MODEF4 + (neg:FMA4MODEF4 + (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "fma4i_fmadd<mode>4" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_expand "sse5i_fmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") +(define_insn "fma4i_fmsub<mode>4" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -(define_expand "sse5i_fnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") +(define_insn "fma4i_fnmadd<mode>4" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "register_operand" "") + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c -(define_expand "sse5i_fnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") +(define_insn "fma4i_fnmsub<mode>4" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "")) - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Scalar instructions -(define_expand "sse5i_vmfmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 1) - (const_int 0))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - ;; For the scalar operations, use operand1 for the upper words that aren't ;; modified, so restrict the forms that are accepted. -(define_insn "*sse5i_vmfmadd<mode>4" +(define_insn "fma4i_vmfmadd<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(vec_merge:SSEMODEF2P (plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) - (const_int 0))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (const_int 1))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<ssescalarmode>")]) -(define_expand "sse5i_vmfmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 0) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfmsub<mode>4" +(define_insn "fma4i_vmfmsub<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) + (match_dup 0) (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<ssescalarmode>")]) -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -(define_expand "sse5i_vmfnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "register_operand" "") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" ""))) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfnmadd<mode>4" +(define_insn "fma4i_vmfnmadd<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(vec_merge:SSEMODEF2P (minus:SSEMODEF2P (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) - (match_dup 1) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) + (match_dup 0) (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<ssescalarmode>")]) -(define_expand "sse5i_vmfnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "")) - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfnmsub<mode>4" +(define_insn "fma4i_vmfnmsub<mode>4" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (unspec:SSEMODEF2P [(vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) + (match_dup 0) (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<ssescalarmode>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; +;; FMA4 Parallel floating point multiply addsub and subadd operations. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "fma4_fmaddsubv8sf4" + [(set (match_operand:V8SF 0 "register_operand" "=x,x") + (vec_merge:V8SF + (plus:V8SF + (mult:V8SF + (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) + (minus:V8SF + (mult:V8SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 170)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V8SF")]) + +(define_insn "fma4_fmaddsubv4df4" + [(set (match_operand:V4DF 0 "register_operand" "=x,x") + (vec_merge:V4DF + (plus:V4DF + (mult:V4DF + (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) + (minus:V4DF + (mult:V4DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 10)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4DF")]) + +(define_insn "fma4_fmaddsubv4sf4" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (plus:V4SF + (mult:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) + (minus:V4SF + (mult:V4SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 10)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fma4_fmaddsubv2df4" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (plus:V2DF + (mult:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) + (minus:V2DF + (mult:V2DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 2)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V2DF")]) + +(define_insn "fma4_fmsubaddv8sf4" + [(set (match_operand:V8SF 0 "register_operand" "=x,x") + (vec_merge:V8SF + (plus:V8SF + (mult:V8SF + (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) + (minus:V8SF + (mult:V8SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 85)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V8SF")]) + +(define_insn "fma4_fmsubaddv4df4" + [(set (match_operand:V4DF 0 "register_operand" "=x,x") + (vec_merge:V4DF + (plus:V4DF + (mult:V4DF + (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) + (minus:V4DF + (mult:V4DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 5)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4DF")]) + +(define_insn "fma4_fmsubaddv4sf4" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (plus:V4SF + (mult:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) + (minus:V4SF + (mult:V4SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 5)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fma4_fmsubaddv2df4" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (plus:V2DF + (mult:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) + (minus:V2DF + (mult:V2DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 1)))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V2DF")]) + +(define_insn "fma4i_fmaddsubv8sf4" + [(set (match_operand:V8SF 0 "register_operand" "=x,x") + (unspec:V8SF + [(vec_merge:V8SF + (plus:V8SF + (mult:V8SF + (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) + (minus:V8SF + (mult:V8SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 170))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V8SF")]) + +(define_insn "fma4i_fmaddsubv4df4" + [(set (match_operand:V4DF 0 "register_operand" "=x,x") + (unspec:V4DF + [(vec_merge:V4DF + (plus:V4DF + (mult:V4DF + (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) + (minus:V4DF + (mult:V4DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 10))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4DF")]) + +(define_insn "fma4i_fmaddsubv4sf4" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (unspec:V4SF + [(vec_merge:V4SF + (plus:V4SF + (mult:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) + (minus:V4SF + (mult:V4SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 10))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fma4i_fmaddsubv2df4" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (unspec:V2DF + [(vec_merge:V2DF + (plus:V2DF + (mult:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) + (minus:V2DF + (mult:V2DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 2))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V2DF")]) + +(define_insn "fma4i_fmsubaddv8sf4" + [(set (match_operand:V8SF 0 "register_operand" "=x,x") + (unspec:V8SF + [(vec_merge:V8SF + (plus:V8SF + (mult:V8SF + (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) + (minus:V8SF + (mult:V8SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 85))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V8SF")]) + +(define_insn "fma4i_fmsubaddv4df4" + [(set (match_operand:V4DF 0 "register_operand" "=x,x") + (unspec:V4DF + [(vec_merge:V4DF + (plus:V4DF + (mult:V4DF + (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) + (minus:V4DF + (mult:V4DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 5))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4DF")]) + +(define_insn "fma4i_fmsubaddv4sf4" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (unspec:V4SF + [(vec_merge:V4SF + (plus:V4SF + (mult:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) + (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) + (minus:V4SF + (mult:V4SF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 5))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fma4i_fmsubaddv2df4" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (unspec:V2DF + [(vec_merge:V2DF + (plus:V2DF + (mult:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) + (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) + (minus:V2DF + (mult:V2DF + (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (const_int 1))] + UNSPEC_FMA4_INTRINSIC))] + "TARGET_FMA4 && TARGET_FUSED_MADD" + "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Parallel single-precision floating point conversion operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2215,6 +2437,7 @@ "cvttps2pi\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "unit" "mmx") + (set_attr "prefix_rep" "0") (set_attr "mode" "SF")]) (define_insn "*avx_cvtsi2ss" @@ -2254,6 +2477,7 @@ "TARGET_AVX && TARGET_64BIT" "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseicvt") + (set_attr "length_vex" "4") (set_attr "prefix" "vex") (set_attr "mode" "SF")]) @@ -2267,6 +2491,7 @@ "TARGET_SSE && TARGET_64BIT" "cvtsi2ssq\t{%2, %0|%0, %2}" [(set_attr "type" "sseicvt") + (set_attr "prefix_rex" "1") (set_attr "athlon_decode" "vector,double") (set_attr "amdfam10_decode" "vector,double") (set_attr "mode" "SF")]) @@ -2375,6 +2600,31 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) +(define_expand "sse2_cvtudq2ps" + [(set (match_dup 5) + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" ""))) + (set (match_dup 6) + (lt:V4SF (match_dup 5) (match_dup 3))) + (set (match_dup 7) + (and:V4SF (match_dup 6) (match_dup 4))) + (set (match_operand:V4SF 0 "register_operand" "") + (plus:V4SF (match_dup 5) (match_dup 7)))] + "TARGET_SSE2" +{ + REAL_VALUE_TYPE TWO32r; + rtx x; + int i; + + real_ldexp (&TWO32r, &dconst1, 32); + x = const_double_from_real_value (TWO32r, SFmode); + + operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); + operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x)); + + for (i = 5; i < 8; i++) + operands[i] = gen_reg_rtx (V4SFmode); +}) + (define_insn "avx_cvtps2dq<avxmodesuffix>" [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") (unspec:AVXMODEDCVTPS2DQ @@ -2413,6 +2663,7 @@ "cvttps2dq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix_rep" "1") + (set_attr "prefix_data16" "0") (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2428,6 +2679,7 @@ "cvtpi2pd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "unit" "mmx,*") + (set_attr "prefix_data16" "1,*") (set_attr "mode" "V2DF")]) (define_insn "sse2_cvtpd2pi" @@ -2488,6 +2740,7 @@ "TARGET_AVX && TARGET_64BIT" "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseicvt") + (set_attr "length_vex" "4") (set_attr "prefix" "vex") (set_attr "mode" "DF")]) @@ -2501,6 +2754,7 @@ "TARGET_SSE2 && TARGET_64BIT" "cvtsi2sdq\t{%2, %0|%0, %2}" [(set_attr "type" "sseicvt") + (set_attr "prefix_rex" "1") (set_attr "mode" "DF") (set_attr "athlon_decode" "double,direct") (set_attr "amdfam10_decode" "vector,double")]) @@ -2642,6 +2896,7 @@ : \"cvtpd2dq\t{%1, %0|%0, %1}\";" [(set_attr "type" "ssecvt") (set_attr "prefix_rep" "1") + (set_attr "prefix_data16" "0") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI") (set_attr "amdfam10_decode" "double")]) @@ -2672,7 +2927,6 @@ "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";" [(set_attr "type" "ssecvt") - (set_attr "prefix_rep" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI") (set_attr "amdfam10_decode" "double")]) @@ -2791,6 +3045,7 @@ [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2DF") + (set_attr "prefix_data16" "0") (set_attr "amdfam10_decode" "direct")]) (define_expand "vec_unpacks_hi_v4sf" @@ -2883,9 +3138,7 @@ (match_dup 2) (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE2" -{ - operands[2] = gen_reg_rtx (V4SImode); -}) + "operands[2] = gen_reg_rtx (V4SImode);") (define_expand "vec_unpacks_float_lo_v4si" [(set (match_operand:V2DF 0 "register_operand" "") @@ -2895,6 +3148,71 @@ (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE2") +(define_expand "vec_unpacku_float_hi_v4si" + [(set (match_dup 5) + (vec_select:V4SI + (match_operand:V4SI 1 "nonimmediate_operand" "") + (parallel [(const_int 2) + (const_int 3) + (const_int 2) + (const_int 3)]))) + (set (match_dup 6) + (float:V2DF + (vec_select:V2SI + (match_dup 5) + (parallel [(const_int 0) (const_int 1)])))) + (set (match_dup 7) + (lt:V2DF (match_dup 6) (match_dup 3))) + (set (match_dup 8) + (and:V2DF (match_dup 7) (match_dup 4))) + (set (match_operand:V2DF 0 "register_operand" "") + (plus:V2DF (match_dup 6) (match_dup 8)))] + "TARGET_SSE2" +{ + REAL_VALUE_TYPE TWO32r; + rtx x; + int i; + + real_ldexp (&TWO32r, &dconst1, 32); + x = const_double_from_real_value (TWO32r, DFmode); + + operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); + operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); + + operands[5] = gen_reg_rtx (V4SImode); + + for (i = 6; i < 9; i++) + operands[i] = gen_reg_rtx (V2DFmode); +}) + +(define_expand "vec_unpacku_float_lo_v4si" + [(set (match_dup 5) + (float:V2DF + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 1)])))) + (set (match_dup 6) + (lt:V2DF (match_dup 5) (match_dup 3))) + (set (match_dup 7) + (and:V2DF (match_dup 6) (match_dup 4))) + (set (match_operand:V2DF 0 "register_operand" "") + (plus:V2DF (match_dup 5) (match_dup 7)))] + "TARGET_SSE2" +{ + REAL_VALUE_TYPE TWO32r; + rtx x; + int i; + + real_ldexp (&TWO32r, &dconst1, 32); + x = const_double_from_real_value (TWO32r, DFmode); + + operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); + operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); + + for (i = 5; i < 8; i++) + operands[i] = gen_reg_rtx (V2DFmode); +}) + (define_expand "vec_pack_trunc_v2df" [(match_operand:V4SF 0 "register_operand" "") (match_operand:V2DF 1 "nonimmediate_operand" "") @@ -2925,9 +3243,9 @@ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); - emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -2944,9 +3262,9 @@ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); - emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); DONE; }) @@ -3056,6 +3374,7 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V4SF,V2SF,V2SF")]) +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpckhps256" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_select:V8SF @@ -3072,7 +3391,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "*avx_unpckhps" +(define_insn "*avx_interleave_highv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3086,7 +3405,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) -(define_insn "sse_unpckhps" +(define_insn "vec_interleave_highv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3099,6 +3418,7 @@ [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpcklps256" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_select:V8SF @@ -3115,7 +3435,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "*avx_unpcklps" +(define_insn "*avx_interleave_lowv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3129,7 +3449,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) -(define_insn "sse_unpcklps" +(define_insn "vec_interleave_lowv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_select:V4SF (vec_concat:V8SF @@ -3261,6 +3581,7 @@ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3302,6 +3623,7 @@ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) @@ -3327,6 +3649,7 @@ return "shufps\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "mode" "V4SF")]) (define_insn "sse_storehps" @@ -3434,6 +3757,7 @@ vmovlps\t{%2, %1, %0|%0, %1, %2} vmovlps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "length_immediate" "1,*,*") (set_attr "prefix" "vex") (set_attr "mode" "V4SF,V2SF,V2SF")]) @@ -3450,6 +3774,7 @@ movlps\t{%2, %0|%0, %2} movlps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "length_immediate" "1,*,*") (set_attr "mode" "V4SF,V2SF,V2SF")]) (define_insn "*avx_movss" @@ -3475,13 +3800,27 @@ [(set_attr "type" "ssemov") (set_attr "mode" "SF")]) -(define_insn "*vec_dupv4sf_avx" - [(set (match_operand:V4SF 0 "register_operand" "=x") +(define_expand "vec_dupv4sf" + [(set (match_operand:V4SF 0 "register_operand" "") (vec_duplicate:V4SF - (match_operand:SF 1 "register_operand" "x")))] - "TARGET_AVX" - "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}" - [(set_attr "type" "sselog1") + (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" +{ + if (!TARGET_AVX) + operands[1] = force_reg (V4SFmode, operands[1]); +}) + +(define_insn "*vec_dupv4sf_avx" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_duplicate:V4SF + (match_operand:SF 1 "nonimmediate_operand" "x,m")))] + "TARGET_AVX" + "@ + vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} + vbroadcastss\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1,ssemov") + (set_attr "length_immediate" "1,0") + (set_attr "prefix_extra" "0,1") (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) @@ -3492,6 +3831,7 @@ "TARGET_SSE" "shufps\t{$0, %0, %0|%0, %0, 0}" [(set_attr "type" "sselog1") + (set_attr "length_immediate" "1") (set_attr "mode" "V4SF")]) (define_insn "*vec_concatv2sf_avx" @@ -3507,6 +3847,8 @@ punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "length_immediate" "*,1,*,*,*") + (set_attr "prefix_extra" "*,1,*,*,*") (set (attr "prefix") (if_then_else (eq_attr "alternative" "3,4") (const_string "orig") @@ -3528,7 +3870,9 @@ punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_data16" "*,1,*,*,*") (set_attr "prefix_extra" "*,1,*,*,*") + (set_attr "length_immediate" "*,1,*,*,*") (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing @@ -3582,35 +3926,78 @@ DONE; }) -(define_insn "*vec_setv4sf_0_avx" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m") - (vec_merge:V4SF - (vec_duplicate:V4SF - (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) - (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0") +(define_insn "*vec_set<mode>_0_avx" + [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m") + (vec_merge:SSEMODE4S + (vec_duplicate:SSEMODE4S + (match_operand:<ssescalarmode> 2 + "general_operand" " x,m,*r,x,*rm,x*rfF")) + (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0") (const_int 1)))] "TARGET_AVX" "@ + vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe} + vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} + vmovd\t{%2, %0|%0, %2} vmovss\t{%2, %1, %0|%0, %1, %2} - vmovss\t{%2, %0|%0, %2} - vmovd\t{%2, %0|%0, %2} + vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} + #" + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*") + (set_attr "prefix_extra" "*,*,*,*,1,*") + (set_attr "length_immediate" "*,*,*,*,1,*") + (set_attr "prefix" "vex") + (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")]) + +(define_insn "*vec_set<mode>_0_sse4_1" + [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m") + (vec_merge:SSEMODE4S + (vec_duplicate:SSEMODE4S + (match_operand:<ssescalarmode> 2 + "general_operand" " x,m,*r,x,*rm,*rfF")) + (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0") + (const_int 1)))] + "TARGET_SSE4_1" + "@ + insertps\t{$0xe, %2, %0|%0, %2, 0xe} + mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} + pinsrd\t{$0, %2, %0|%0, %2, 0} + #" + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*") + (set_attr "prefix_extra" "*,*,*,*,1,*") + (set_attr "length_immediate" "*,*,*,*,1,*") + (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")]) + +(define_insn "*vec_set<mode>_0_sse2" + [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m") + (vec_merge:SSEMODE4S + (vec_duplicate:SSEMODE4S + (match_operand:<ssescalarmode> 2 + "general_operand" " m,*r,x,x*rfF")) + (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0") + (const_int 1)))] + "TARGET_SSE2" + "@ + mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} #" [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "SF")]) - -(define_insn "vec_setv4sf_0" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m") - (vec_merge:V4SF - (vec_duplicate:V4SF - (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) - (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") + (set_attr "mode" "<ssescalarmode>,SI,SF,*")]) + +(define_insn "vec_set<mode>_0" + [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m") + (vec_merge:SSEMODE4S + (vec_duplicate:SSEMODE4S + (match_operand:<ssescalarmode> 2 + "general_operand" " m,x,x*rfF")) + (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0") (const_int 1)))] "TARGET_SSE" "@ movss\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} - movd\t{%2, %0|%0, %2} #" [(set_attr "type" "ssemov") (set_attr "mode" "SF")]) @@ -3629,6 +4016,8 @@ return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V4SF")]) @@ -3645,7 +4034,9 @@ return "insertps\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "V4SF")]) (define_insn "*avx_insertps" @@ -3658,6 +4049,8 @@ "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; [(set_attr "type" "sselog") (set_attr "prefix" "vex") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "V4SF")]) (define_insn "sse4_1_insertps" @@ -3669,7 +4062,9 @@ "TARGET_SSE4_1" "insertps\t{%3, %2, %0|%0, %2, %3}"; [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "V4SF")]) (define_split @@ -3744,6 +4139,8 @@ "TARGET_AVX" "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3756,6 +4153,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3769,6 +4168,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3782,6 +4183,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3797,6 +4200,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3812,6 +4217,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3831,6 +4238,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3850,6 +4259,8 @@ "TARGET_AVX" "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3862,7 +4273,9 @@ "TARGET_SSE4_1" "%vextractps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) @@ -3899,6 +4312,7 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Recall that the 256-bit unpck insns only shuffle within their lanes. (define_insn "avx_unpckhpd256" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_select:V4DF @@ -3913,8 +4327,8 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_expand "sse2_unpckhpd_exp" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "") +(define_expand "vec_interleave_highv2df" + [(set (match_operand:V2DF 0 "register_operand" "") (vec_select:V2DF (vec_concat:V4DF (match_operand:V2DF 1 "nonimmediate_operand" "") @@ -3922,26 +4336,48 @@ (parallel [(const_int 1) (const_int 3)])))] "TARGET_SSE2" - "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") - -(define_insn "*avx_unpckhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") +{ + if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) + operands[2] = force_reg (V2DFmode, operands[2]); +}) + +(define_insn "*avx_interleave_highv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x") - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0")) + (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0")) (parallel [(const_int 1) (const_int 3)])))] - "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)" "@ vunpckhpd\t{%2, %1, %0|%0, %1, %2} + vmovddup\t{%H1, %0|%0, %H1} vmovlpd\t{%H1, %2, %0|%0, %2, %H1} vmovhpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "V2DF,V1DF,V1DF")]) - -(define_insn "sse2_unpckhpd" + [(set_attr "type" "sselog,sselog,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) + +(define_insn "*sse3_interleave_highv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" + "@ + unpckhpd\t{%2, %0|%0, %2} + movddup\t{%H1, %0|%0, %H1} + movlpd\t{%H1, %0|%0, %H1} + movhpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,sselog,ssemov,ssemov") + (set_attr "prefix_data16" "*,*,1,1") + (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) + +(define_insn "*sse2_interleave_highv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (vec_select:V2DF (vec_concat:V4DF @@ -3949,58 +4385,121 @@ (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) (parallel [(const_int 1) (const_int 3)])))] - "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" "@ unpckhpd\t{%2, %0|%0, %2} movlpd\t{%H1, %0|%0, %H1} movhpd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "prefix_data16" "*,1,1") (set_attr "mode" "V2DF,V1DF,V1DF")]) -(define_insn "avx_movddup256" - [(set (match_operand:V4DF 0 "register_operand" "=x") +;; Recall that the 256-bit unpck insns only shuffle within their lanes. +(define_expand "avx_movddup256" + [(set (match_operand:V4DF 0 "register_operand" "") + (vec_select:V4DF + (vec_concat:V8DF + (match_operand:V4DF 1 "nonimmediate_operand" "") + (match_dup 1)) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_AVX" + "") + +(define_expand "avx_unpcklpd256" + [(set (match_operand:V4DF 0 "register_operand" "") (vec_select:V4DF (vec_concat:V8DF - (match_operand:V4DF 1 "nonimmediate_operand" "xm") - (match_dup 1)) - (parallel [(const_int 0) (const_int 2) - (const_int 4) (const_int 6)])))] - "TARGET_AVX" - "vmovddup\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog1") + (match_operand:V4DF 1 "register_operand" "") + (match_operand:V4DF 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_AVX" + "") + +(define_insn "*avx_unpcklpd256" + [(set (match_operand:V4DF 0 "register_operand" "=x,x") + (vec_select:V4DF + (vec_concat:V8DF + (match_operand:V4DF 1 "nonimmediate_operand" "xm,x") + (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_AVX + && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))" + "@ + vmovddup\t{%1, %0|%0, %1} + vunpcklpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_insn "*avx_movddup" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") +(define_expand "vec_interleave_lowv2df" + [(set (match_operand:V2DF 0 "register_operand" "") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") - (match_dup 1)) + (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2" +{ + if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) + operands[1] = force_reg (V2DFmode, operands[1]); +}) + +(define_insn "*avx_interleave_lowv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x")) (parallel [(const_int 0) (const_int 2)])))] - "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)" "@ + vunpcklpd\t{%2, %1, %0|%0, %1, %2} vmovddup\t{%1, %0|%0, %1} - #" - [(set_attr "type" "sselog1,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "V2DF")]) - -(define_insn "*sse3_movddup" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") + vmovhpd\t{%2, %1, %0|%0, %1, %2} + vmovlpd\t{%2, %H0|%H0, %2}" + [(set_attr "type" "sselog,sselog,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) + +(define_insn "*sse3_interleave_lowv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") - (match_dup 1)) + (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x")) (parallel [(const_int 0) (const_int 2)])))] - "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" "@ + unpcklpd\t{%2, %0|%0, %2} movddup\t{%1, %0|%0, %1} - #" - [(set_attr "type" "sselog1,ssemov") - (set_attr "mode" "V2DF")]) + movhpd\t{%2, %0|%0, %2} + movlpd\t{%2, %H0|%H0, %2}" + [(set_attr "type" "sselog,sselog,ssemov,ssemov") + (set_attr "prefix_data16" "*,*,1,1") + (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) + +(define_insn "*sse2_interleave_lowv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" + "@ + unpcklpd\t{%2, %0|%0, %2} + movhpd\t{%2, %0|%0, %2} + movlpd\t{%2, %H0|%H0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "prefix_data16" "*,1,1") + (set_attr "mode" "V2DF,V1DF,V1DF")]) (define_split [(set (match_operand:V2DF 0 "memory_operand" "") @@ -4019,63 +4518,19 @@ DONE; }) -(define_insn "avx_unpcklpd256" - [(set (match_operand:V4DF 0 "register_operand" "=x") - (vec_select:V4DF - (vec_concat:V8DF - (match_operand:V4DF 1 "register_operand" "x") - (match_operand:V4DF 2 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 4) - (const_int 2) (const_int 6)])))] - "TARGET_AVX" - "vunpcklpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix" "vex") - (set_attr "mode" "V4DF")]) - -(define_expand "sse2_unpcklpd_exp" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2" - "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") - -(define_insn "*avx_unpcklpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0") - (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "@ - vunpcklpd\t{%2, %1, %0|%0, %1, %2} - vmovhpd\t{%2, %1, %0|%0, %1, %2} - vmovlpd\t{%2, %H0|%H0, %2}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "V2DF,V1DF,V1DF")]) - -(define_insn "sse2_unpcklpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "@ - unpcklpd\t{%2, %0|%0, %2} - movhpd\t{%2, %0|%0, %2} - movlpd\t{%2, %H0|%H0, %2}" - [(set_attr "type" "sselog,ssemov,ssemov") - (set_attr "mode" "V2DF,V1DF,V1DF")]) + (match_operand:V2DF 1 "memory_operand" "") + (match_dup 1)) + (parallel [(match_operand:SI 2 "const_0_to_1_operand" "") + (match_operand:SI 3 "const_int_operand" "")])))] + "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" + [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] +{ + operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); +}) (define_expand "avx_shufpd256" [(match_operand:V4DF 0 "register_operand" "") @@ -4115,6 +4570,7 @@ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) @@ -4133,51 +4589,27 @@ }) (define_expand "vec_extract_even<mode>" - [(set (match_operand:SSEMODE4S 0 "register_operand" "") - (vec_select:SSEMODE4S - (vec_concat:<ssedoublesizemode> - (match_operand:SSEMODE4S 1 "register_operand" "") - (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))] - "TARGET_SSE") + [(match_operand:SSEMODE_EO 0 "register_operand" "") + (match_operand:SSEMODE_EO 1 "register_operand" "") + (match_operand:SSEMODE_EO 2 "register_operand" "")] + "" +{ + ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0); + DONE; +}) (define_expand "vec_extract_odd<mode>" - [(set (match_operand:SSEMODE4S 0 "register_operand" "") - (vec_select:SSEMODE4S - (vec_concat:<ssedoublesizemode> - (match_operand:SSEMODE4S 1 "register_operand" "") - (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))] - "TARGET_SSE") - -(define_expand "vec_extract_even<mode>" - [(set (match_operand:SSEMODE2D 0 "register_operand" "") - (vec_select:SSEMODE2D - (vec_concat:<ssedoublesizemode> - (match_operand:SSEMODE2D 1 "register_operand" "") - (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2") - -(define_expand "vec_extract_odd<mode>" - [(set (match_operand:SSEMODE2D 0 "register_operand" "") - (vec_select:SSEMODE2D - (vec_concat:<ssedoublesizemode> - (match_operand:SSEMODE2D 1 "register_operand" "") - (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2") + [(match_operand:SSEMODE_EO 0 "register_operand" "") + (match_operand:SSEMODE_EO 1 "register_operand" "") + (match_operand:SSEMODE_EO 2 "register_operand" "")] + "" +{ + ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1); + DONE; +}) ;; punpcklqdq and punpckhqdq are shorter than shufpd. -(define_insn "*avx_punpckhqdq" +(define_insn "*avx_interleave_highv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4191,7 +4623,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhqdq" +(define_insn "vec_interleave_highv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4205,7 +4637,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklqdq" +(define_insn "*avx_interleave_lowv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4219,7 +4651,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklqdq" +(define_insn "vec_interleave_lowv2di" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_select:V2DI (vec_concat:V4DI @@ -4251,6 +4683,7 @@ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V2DF")]) @@ -4272,6 +4705,7 @@ return "shufpd\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") (set_attr "mode" "V2DF")]) ;; Avoid combining registers from different units in a single alternative, @@ -4305,6 +4739,7 @@ # #" [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov") + (set_attr "prefix_data16" "1,*,*,*,*") (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) (define_split @@ -4333,6 +4768,7 @@ # #" [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") + (set_attr "prefix_data16" "1,*,*,*,*") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V1DF,DF,DF,DF,DF")]) @@ -4399,6 +4835,8 @@ # #" [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "prefix_data16" "1,*,*,*,*,*") + (set_attr "length_immediate" "*,*,1,*,*,*") (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) (define_split @@ -4462,6 +4900,8 @@ # #" [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov") + (set_attr "prefix_data16" "*,1,*,*,1,*,*,*") + (set_attr "length_immediate" "*,*,*,1,*,*,*,*") (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")]) (define_split @@ -4537,6 +4977,8 @@ movhps\t{%H1, %0|%0, %H1} movhps\t{%1, %H0|%H0, %1}" [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "prefix_data16" "*,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,1,*,*") (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) (define_insn "*vec_dupv2df_sse3" @@ -4596,6 +5038,7 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") + (set_attr "prefix_data16" "*,1,*,*,*") (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -4677,56 +5120,25 @@ (mult:V16QI (match_operand:V16QI 1 "register_operand" "") (match_operand:V16QI 2 "register_operand" "")))] "TARGET_SSE2 - && !(reload_completed || reload_in_progress)" + && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] { - rtx t[12], op0, op[3]; + rtx t[6]; int i; - if (TARGET_SSE5) - { - /* On SSE5, we can take advantage of the pperm instruction to pack and - unpack the bytes. Unpack data such that we've got a source byte in - each low byte of each word. We don't care what goes into the high - byte, so put 0 there. */ - for (i = 0; i < 6; ++i) - t[i] = gen_reg_rtx (V8HImode); - - for (i = 0; i < 2; i++) - { - op[0] = t[i]; - op[1] = operands[i+1]; - ix86_expand_sse5_unpack (op, true, true); /* high bytes */ - - op[0] = t[i+2]; - ix86_expand_sse5_unpack (op, true, false); /* low bytes */ - } - - /* Multiply words. */ - emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */ - emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */ - - /* Pack the low byte of each word back into a single xmm */ - op[0] = operands[0]; - op[1] = t[5]; - op[2] = t[4]; - ix86_expand_sse5_pack (op); - DONE; - } - - for (i = 0; i < 12; ++i) + for (i = 0; i < 6; ++i) t[i] = gen_reg_rtx (V16QImode); /* Unpack data such that we've got a source byte in each low byte of each word. We don't care what goes into the high byte of each word. Rather than trying to get zero in there, most convenient is to let it be a copy of the low byte. */ - emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); - emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); - emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); - emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); + emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1])); + emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2])); + emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1])); + emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2])); /* Multiply words. The end-of-line annotations here give a picture of what the output of that instruction looks like. Dot means don't care; the @@ -4738,16 +5150,8 @@ gen_lowpart (V8HImode, t[2]), gen_lowpart (V8HImode, t[3]))); - /* Extract the relevant bytes and merge them back together. */ - emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ - emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ - emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ - emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ - emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ - emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ - - op0 = operands[0]; - emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ + /* Extract the even bytes and merge them back together. */ + ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0); DONE; }) @@ -4944,6 +5348,7 @@ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" "vpmuldq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -5069,6 +5474,7 @@ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" "pmaddwd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -5078,7 +5484,7 @@ (match_operand:V4SI 2 "register_operand" "")))] "TARGET_SSE2" { - if (TARGET_SSE4_1 || TARGET_SSE5) + if (TARGET_SSE4_1 || TARGET_AVX) ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); }) @@ -5089,6 +5495,7 @@ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" "vpmulld\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -5102,37 +5509,12 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a -;; multiply/add. In general, we expect the define_split to occur before -;; register allocation, so we have to handle the corner case where the target -;; is the same as one of the inputs. -(define_insn_and_split "*sse5_mulv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=&x") - (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE5" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V4SI (mult:V4SI (match_dup 1) - (match_dup 2)) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V4SImode); -} - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - (define_insn_and_split "*sse2_mulv4si3" [(set (match_operand:V4SI 0 "register_operand" "") (mult:V4SI (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5 - && !(reload_completed || reload_in_progress)" + "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX + && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] @@ -5158,12 +5540,12 @@ /* Shift both input vectors down one element, so that elements 3 and 1 are now in the slots for elements 2 and 0. For K8, at least, this is faster than using a shuffle. */ - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), - gen_lowpart (TImode, op1), - thirtytwo)); - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), - gen_lowpart (TImode, op2), - thirtytwo)); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), + gen_lowpart (V1TImode, op1), + thirtytwo)); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), + gen_lowpart (V1TImode, op2), + thirtytwo)); /* Multiply elements 3 and 1. */ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); @@ -5176,7 +5558,7 @@ const0_rtx, const0_rtx)); /* Merge the parts back together. */ - emit_insn (gen_sse2_punpckldq (op0, t5, t6)); + emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6)); DONE; }) @@ -5185,7 +5567,7 @@ (mult:V2DI (match_operand:V2DI 1 "register_operand" "") (match_operand:V2DI 2 "register_operand" "")))] "TARGET_SSE2 - && !(reload_completed || reload_in_progress)" + && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] @@ -5193,17 +5575,20 @@ rtx t1, t2, t3, t4, t5, t6, thirtytwo; rtx op0, op1, op2; - if (TARGET_SSE5) + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + + if (TARGET_XOP) { /* op1: A,B,C,D, op2: E,F,G,H */ - op0 = operands[0]; - op1 = gen_lowpart (V4SImode, operands[1]); - op2 = gen_lowpart (V4SImode, operands[2]); + op1 = gen_lowpart (V4SImode, op1); + op2 = gen_lowpart (V4SImode, op2); + t1 = gen_reg_rtx (V4SImode); t2 = gen_reg_rtx (V4SImode); - t3 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V2DImode); t4 = gen_reg_rtx (V2DImode); - t5 = gen_reg_rtx (V2DImode); /* t1: B,A,D,C */ emit_insn (gen_sse2_pshufd_1 (t1, op1, @@ -5212,55 +5597,50 @@ GEN_INT (3), GEN_INT (2))); - /* t2: 0 */ - emit_move_insn (t2, CONST0_RTX (V4SImode)); - - /* t3: (B*E),(A*F),(D*G),(C*H) */ - emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2)); + /* t2: (B*E),(A*F),(D*G),(C*H) */ + emit_insn (gen_mulv4si3 (t2, t1, op2)); /* t4: (B*E)+(A*F), (D*G)+(C*H) */ - emit_insn (gen_sse5_phadddq (t4, t3)); + emit_insn (gen_xop_phadddq (t3, t2)); /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ - emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); + emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ - emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5)); - DONE; + emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); } - - op0 = operands[0]; - op1 = operands[1]; - op2 = operands[2]; - t1 = gen_reg_rtx (V2DImode); - t2 = gen_reg_rtx (V2DImode); - t3 = gen_reg_rtx (V2DImode); - t4 = gen_reg_rtx (V2DImode); - t5 = gen_reg_rtx (V2DImode); - t6 = gen_reg_rtx (V2DImode); - thirtytwo = GEN_INT (32); - - /* Multiply low parts. */ - emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), - gen_lowpart (V4SImode, op2))); - - /* Shift input vectors left 32 bits so we can multiply high parts. */ - emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); - emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); - - /* Multiply high parts by low parts. */ - emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), - gen_lowpart (V4SImode, t3))); - emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), - gen_lowpart (V4SImode, t2))); - - /* Shift them back. */ - emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); - emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); - - /* Add the three parts together. */ - emit_insn (gen_addv2di3 (t6, t1, t4)); - emit_insn (gen_addv2di3 (op0, t6, t5)); + else + { + t1 = gen_reg_rtx (V2DImode); + t2 = gen_reg_rtx (V2DImode); + t3 = gen_reg_rtx (V2DImode); + t4 = gen_reg_rtx (V2DImode); + t5 = gen_reg_rtx (V2DImode); + t6 = gen_reg_rtx (V2DImode); + thirtytwo = GEN_INT (32); + + /* Multiply low parts. */ + emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, op2))); + + /* Shift input vectors left 32 bits so we can multiply high parts. */ + emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); + emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); + + /* Multiply high parts by low parts. */ + emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, t3))); + emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), + gen_lowpart (V4SImode, t2))); + + /* Shift them back. */ + emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); + emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); + + /* Add the three parts together. */ + emit_insn (gen_addv2di3 (t6, t1, t4)); + emit_insn (gen_addv2di3 (op0, t6, t5)); + } DONE; }) @@ -5348,7 +5728,7 @@ [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtx t1, t2; @@ -5365,7 +5745,7 @@ GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2)); + emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); DONE; }) @@ -5373,7 +5753,7 @@ [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtx t1, t2; @@ -5390,8 +5770,7 @@ GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2)); - DONE; + emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); DONE; }) @@ -5461,12 +5840,12 @@ t2 = gen_reg_rtx (V4SImode); t3 = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), - gen_lowpart (TImode, operands[1]), - GEN_INT (32))); - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), - gen_lowpart (TImode, operands[2]), - GEN_INT (32))); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), + gen_lowpart (V1TImode, operands[1]), + GEN_INT (32))); + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), + gen_lowpart (V1TImode, operands[2]), + GEN_INT (32))); t4 = gen_reg_rtx (V2DImode); emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); @@ -5484,6 +5863,10 @@ "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix" "vex") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) (set_attr "mode" "TI")]) (define_insn "ashr<mode>3" @@ -5495,6 +5878,25 @@ "psra<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "1") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + +(define_insn "*avx_lshrv1ti3" + [(set (match_operand:V1TI 0 "register_operand" "=x") + (lshiftrt:V1TI + (match_operand:V1TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*avx_lshr<mode>3" @@ -5506,6 +5908,25 @@ "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix" "vex") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrv1ti3" + [(set (match_operand:V1TI 0 "register_operand" "=x") + (lshiftrt:V1TI + (match_operand:V1TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "lshr<mode>3" @@ -5517,6 +5938,24 @@ "psrl<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "1") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + +(define_insn "*avx_ashlv1ti3" + [(set (match_operand:V1TI 0 "register_operand" "=x") + (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*avx_ashl<mode>3" @@ -5528,6 +5967,24 @@ "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix" "vex") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + +(define_insn "sse2_ashlv1ti3" + [(set (match_operand:V1TI 0 "register_operand" "=x") + (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "ashl<mode>3" @@ -5539,26 +5996,32 @@ "psll<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "1") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "1") + (const_string "0"))) (set_attr "mode" "TI")]) (define_expand "vec_shl_<mode>" [(set (match_operand:SSEMODEI 0 "register_operand" "") - (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] - "TARGET_SSE2" -{ - operands[0] = gen_lowpart (TImode, operands[0]); - operands[1] = gen_lowpart (TImode, operands[1]); + (ashift:V1TI + (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] + "TARGET_SSE2" +{ + operands[0] = gen_lowpart (V1TImode, operands[0]); + operands[1] = gen_lowpart (V1TImode, operands[1]); }) (define_expand "vec_shr_<mode>" [(set (match_operand:SSEMODEI 0 "register_operand" "") - (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] - "TARGET_SSE2" -{ - operands[0] = gen_lowpart (TImode, operands[0]); - operands[1] = gen_lowpart (TImode, operands[1]); + (lshiftrt:V1TI + (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] + "TARGET_SSE2" +{ + operands[0] = gen_lowpart (V1TImode, operands[0]); + operands[1] = gen_lowpart (V1TImode, operands[1]); }) (define_insn "*avx_<code><mode>3" @@ -5569,6 +6032,12 @@ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set (attr "prefix_extra") + (if_then_else + (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)") + (const_int 0)) + (const_string "1") + (const_string "0"))) (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -5762,7 +6231,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE5" + "TARGET_SSE2 && !TARGET_XOP " "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") (define_insn "*avx_eq<mode>3" @@ -5773,6 +6242,10 @@ "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecmp") + (set (attr "prefix_extra") + (if_then_else (match_operand:V2DI 0 "" "") + (const_string "1") + (const_string "*"))) (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -5781,7 +6254,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && !TARGET_SSE5 + "TARGET_SSE2 && !TARGET_XOP && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") @@ -5815,6 +6288,10 @@ "TARGET_AVX" "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecmp") + (set (attr "prefix_extra") + (if_then_else (match_operand:V2DI 0 "" "") + (const_string "1") + (const_string "*"))) (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -5823,7 +6300,7 @@ (gt:SSEMODE124 (match_operand:SSEMODE124 1 "register_operand" "0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && !TARGET_SSE5" + "TARGET_SSE2 && !TARGET_XOP" "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "1") @@ -5837,6 +6314,7 @@ "TARGET_SSE4_2" "pcmpgtq\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") + (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) (define_expand "vcond<mode>" @@ -5946,7 +6424,7 @@ (define_expand "<code><mode>3" [(set (match_operand:SSEMODEI 0 "register_operand" "") - (plogic:SSEMODEI + (any_logic:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] "TARGET_SSE" @@ -5954,53 +6432,53 @@ (define_insn "*avx_<code><mode>3" [(set (match_operand:AVX256MODEI 0 "register_operand" "=x") - (plogic:AVX256MODEI + (any_logic:AVX256MODEI (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x") (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))] "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}" + "v<logicprefix>ps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecpsmode>")]) (define_insn "*sse_<code><mode>3" [(set (match_operand:SSEMODEI 0 "register_operand" "=x") - (plogic:SSEMODEI + (any_logic:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] "(TARGET_SSE && !TARGET_SSE2) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "<plogicprefix>ps\t{%2, %0|%0, %2}" + "<logicprefix>ps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) (define_insn "*avx_<code><mode>3" [(set (match_operand:SSEMODEI 0 "register_operand" "=x") - (plogic:SSEMODEI + (any_logic:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x") (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}" + "vp<logicprefix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) (define_insn "*sse2_<code><mode>3" [(set (match_operand:SSEMODEI 0 "register_operand" "=x") - (plogic:SSEMODEI + (any_logic:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "p<plogicprefix>\t{%2, %0|%0, %2}" + "p<logicprefix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) (define_expand "<code>tf3" [(set (match_operand:TF 0 "register_operand" "") - (plogic:TF + (any_logic:TF (match_operand:TF 1 "nonimmediate_operand" "") (match_operand:TF 2 "nonimmediate_operand" "")))] "TARGET_SSE2" @@ -6008,11 +6486,11 @@ (define_insn "*<code>tf3" [(set (match_operand:TF 0 "register_operand" "=x") - (plogic:TF + (any_logic:TF (match_operand:TF 1 "nonimmediate_operand" "%0") (match_operand:TF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" - "p<plogicprefix>\t{%2, %0|%0, %2}" + "p<logicprefix>\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -6023,284 +6501,41 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Reduce: -;; op1 = abcdefghijklmnop -;; op2 = qrstuvwxyz012345 -;; h1 = aqbrcsdteufvgwhx -;; l1 = iyjzk0l1m2n3o4p5 -;; h2 = aiqybjrzcks0dlt1 -;; l2 = emu2fnv3gow4hpx5 -;; h3 = aeimquy2bfjnrvz3 -;; l3 = cgkosw04dhlptx15 -;; result = bdfhjlnprtvxz135 (define_expand "vec_pack_trunc_v8hi" [(match_operand:V16QI 0 "register_operand" "") (match_operand:V8HI 1 "register_operand" "") (match_operand:V8HI 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1, op2, h1, l1, h2, l2, h3, l3; - - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - - op1 = gen_lowpart (V16QImode, operands[1]); - op2 = gen_lowpart (V16QImode, operands[2]); - h1 = gen_reg_rtx (V16QImode); - l1 = gen_reg_rtx (V16QImode); - h2 = gen_reg_rtx (V16QImode); - l2 = gen_reg_rtx (V16QImode); - h3 = gen_reg_rtx (V16QImode); - l3 = gen_reg_rtx (V16QImode); - - emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2)); - emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2)); - emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1)); - emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1)); - emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2)); - emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2)); - emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3)); - DONE; -}) - -;; Reduce: -;; op1 = abcdefgh -;; op2 = ijklmnop -;; h1 = aibjckdl -;; l1 = emfngohp -;; h2 = aeimbfjn -;; l2 = cgkodhlp -;; result = bdfhjlnp + rtx op1 = gen_lowpart (V16QImode, operands[1]); + rtx op2 = gen_lowpart (V16QImode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) + (define_expand "vec_pack_trunc_v4si" [(match_operand:V8HI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1, op2, h1, l1, h2, l2; - - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - - op1 = gen_lowpart (V8HImode, operands[1]); - op2 = gen_lowpart (V8HImode, operands[2]); - h1 = gen_reg_rtx (V8HImode); - l1 = gen_reg_rtx (V8HImode); - h2 = gen_reg_rtx (V8HImode); - l2 = gen_reg_rtx (V8HImode); - - emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2)); - emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2)); - emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1)); - emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1)); - emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2)); - DONE; -}) - -;; Reduce: -;; op1 = abcd -;; op2 = efgh -;; h1 = aebf -;; l1 = cgdh -;; result = bdfh + rtx op1 = gen_lowpart (V8HImode, operands[1]); + rtx op2 = gen_lowpart (V8HImode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) + (define_expand "vec_pack_trunc_v2di" [(match_operand:V4SI 0 "register_operand" "") (match_operand:V2DI 1 "register_operand" "") (match_operand:V2DI 2 "register_operand" "")] "TARGET_SSE2" { - rtx op1, op2, h1, l1; - - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - - op1 = gen_lowpart (V4SImode, operands[1]); - op2 = gen_lowpart (V4SImode, operands[2]); - h1 = gen_reg_rtx (V4SImode); - l1 = gen_reg_rtx (V4SImode); - - emit_insn (gen_vec_interleave_highv4si (h1, op1, op2)); - emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2)); - emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1)); - DONE; -}) - -(define_expand "vec_interleave_highv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (vec_select:V16QI - (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "nonimmediate_operand" "")) - (parallel [(const_int 8) (const_int 24) - (const_int 9) (const_int 25) - (const_int 10) (const_int 26) - (const_int 11) (const_int 27) - (const_int 12) (const_int 28) - (const_int 13) (const_int 29) - (const_int 14) (const_int 30) - (const_int 15) (const_int 31)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (vec_select:V16QI - (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 16) - (const_int 1) (const_int 17) - (const_int 2) (const_int 18) - (const_int 3) (const_int 19) - (const_int 4) (const_int 20) - (const_int 5) (const_int 21) - (const_int 6) (const_int 22) - (const_int 7) (const_int 23)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (parallel [(const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv8hi" - [(set (match_operand:V8HI 0 "register_operand" "") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (vec_select:V4SI - (vec_concat:V8SI - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv2di" - [(set (match_operand:V2DI 0 "register_operand" "") - (vec_select:V2DI - (vec_concat:V4DI - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_lowv2di" - [(set (match_operand:V2DI 0 "register_operand" "") - (vec_select:V2DI - (vec_concat:V4DI - (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2" -{ - emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "vec_interleave_highv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")) - (parallel [(const_int 2) (const_int 6) - (const_int 3) (const_int 7)])))] - "TARGET_SSE") - -(define_expand "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5)])))] - "TARGET_SSE") - -(define_expand "vec_interleave_highv2df" - [(set (match_operand:V2DF 0 "register_operand" "") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")) - (parallel [(const_int 1) - (const_int 3)])))] - "TARGET_SSE2") - -(define_expand "vec_interleave_lowv2df" - [(set (match_operand:V2DF 0 "register_operand" "") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")) - (parallel [(const_int 0) - (const_int 2)])))] - "TARGET_SSE2") + rtx op1 = gen_lowpart (V4SImode, operands[1]); + rtx op2 = gen_lowpart (V4SImode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) (define_insn "*avx_packsswb" [(set (match_operand:V16QI 0 "register_operand" "=x") @@ -6380,7 +6615,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhbw" +(define_insn "*avx_interleave_highv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6400,7 +6635,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhbw" +(define_insn "vec_interleave_highv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6420,7 +6655,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklbw" +(define_insn "*avx_interleave_lowv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6440,7 +6675,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklbw" +(define_insn "vec_interleave_lowv16qi" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_select:V16QI (vec_concat:V32QI @@ -6460,7 +6695,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhwd" +(define_insn "*avx_interleave_highv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6476,7 +6711,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhwd" +(define_insn "vec_interleave_highv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6492,7 +6727,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpcklwd" +(define_insn "*avx_interleave_lowv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6508,7 +6743,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpcklwd" +(define_insn "vec_interleave_lowv8hi" [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_select:V8HI (vec_concat:V16HI @@ -6524,7 +6759,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckhdq" +(define_insn "*avx_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -6538,7 +6773,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckhdq" +(define_insn "vec_interleave_highv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -6552,7 +6787,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_punpckldq" +(define_insn "*avx_interleave_lowv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -6566,7 +6801,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "sse2_punpckldq" +(define_insn "vec_interleave_lowv4si" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_select:V4SI (vec_concat:V8SI @@ -6580,7 +6815,7 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_insn "*avx_pinsr<avxmodesuffixs>" +(define_insn "*avx_pinsr<ssevecsize>" [(set (match_operand:SSEMODE124 0 "register_operand" "=x") (vec_merge:SSEMODE124 (vec_duplicate:SSEMODE124 @@ -6591,11 +6826,16 @@ { operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); if (MEM_P (operands[2])) - return "vpinsr<avxmodesuffixs>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; else - return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; + return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; } [(set_attr "type" "sselog") + (set (attr "prefix_extra") + (if_then_else (match_operand:V8HI 0 "register_operand" "") + (const_string "0") + (const_string "1"))) + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -6616,6 +6856,7 @@ } [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*sse2_pinsrw" @@ -6635,6 +6876,7 @@ } [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) ;; It must come before sse2_loadld since it is preferred. @@ -6652,6 +6894,7 @@ } [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*avx_pinsrq" @@ -6667,6 +6910,8 @@ return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -6683,7 +6928,9 @@ return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") + (set_attr "prefix_rex" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*sse4_1_pextrb" @@ -6696,6 +6943,7 @@ "%vpextrb\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6708,6 +6956,7 @@ "%vpextrb\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6721,6 +6970,7 @@ "%vpextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6733,6 +6983,7 @@ "%vpextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6745,6 +6996,7 @@ "%vpextrd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6757,7 +7009,9 @@ "TARGET_SSE4_1 && TARGET_64BIT" "%vpextrq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") + (set_attr "prefix_rex" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -6797,7 +7051,8 @@ } [(set_attr "type" "sselog1") (set_attr "prefix_data16" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_vex") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "sse2_pshuflw" @@ -6839,8 +7094,10 @@ return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") + (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "1") (set_attr "prefix" "maybe_vex") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "sse2_pshufhw" @@ -6883,7 +7140,9 @@ } [(set_attr "type" "sselog") (set_attr "prefix_rep" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix_data16" "0") + (set_attr "prefix" "maybe_vex") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "sse2_loadd" @@ -7019,6 +7278,7 @@ vmovq\t{%H1, %0|%0, %H1} vmov{q}\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov,imov") + (set_attr "length_immediate" "*,1,*,*") (set_attr "memory" "*,none,*,*") (set_attr "prefix" "vex") (set_attr "mode" "V2SF,TI,TI,DI")]) @@ -7035,6 +7295,8 @@ movq\t{%H1, %0|%0, %H1} mov{q}\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov,imov") + (set_attr "length_immediate" "*,1,*,*") + (set_attr "atom_unit" "*,sishuf,*,*") (set_attr "memory" "*,none,*,*") (set_attr "mode" "V2SF,TI,TI,DI")]) @@ -7051,6 +7313,7 @@ vpsrldq\t{$8, %1, %0|%0, %1, 8} vmovq\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov") + (set_attr "length_immediate" "*,1,*") (set_attr "memory" "*,none,*") (set_attr "prefix" "vex") (set_attr "mode" "V2SF,TI,TI")]) @@ -7067,6 +7330,8 @@ psrldq\t{$8, %0|%0, 8} movq\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov") + (set_attr "length_immediate" "*,1,*") + (set_attr "atom_unit" "*,sishuf,*") (set_attr "memory" "*,none,*") (set_attr "mode" "V2SF,TI,TI")]) @@ -7085,6 +7350,20 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V2SF,V4SF,V2SF")]) +(define_insn "*vec_dupv4si_avx" + [(set (match_operand:V4SI 0 "register_operand" "=x,x") + (vec_duplicate:V4SI + (match_operand:SI 1 "register_operand" "x,m")))] + "TARGET_AVX" + "@ + vpshufd\t{$0, %1, %0|%0, %1, 0} + vbroadcastss\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1,ssemov") + (set_attr "length_immediate" "1,0") + (set_attr "prefix_extra" "0,1") + (set_attr "prefix" "vex") + (set_attr "mode" "TI,V4SF")]) + (define_insn "*vec_dupv4si" [(set (match_operand:V4SI 0 "register_operand" "=Y2,x") (vec_duplicate:V4SI @@ -7094,18 +7373,31 @@ %vpshufd\t{$0, %1, %0|%0, %1, 0} shufps\t{$0, %0, %0|%0, %0, 0}" [(set_attr "type" "sselog1") - (set_attr "prefix" "maybe_vex,orig") + (set_attr "length_immediate" "1") (set_attr "mode" "TI,V4SF")]) (define_insn "*vec_dupv2di_avx" - [(set (match_operand:V2DI 0 "register_operand" "=x") + [(set (match_operand:V2DI 0 "register_operand" "=x,x") (vec_duplicate:V2DI - (match_operand:DI 1 "register_operand" "x")))] - "TARGET_AVX" - "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}" + (match_operand:DI 1 "nonimmediate_operand" " x,m")))] + "TARGET_AVX" + "@ + vpunpcklqdq\t{%1, %1, %0|%0, %1, %1} + vmovddup\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) + (set_attr "mode" "TI,DF")]) + +(define_insn "*vec_dupv2di_sse3" + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" " 0,m")))] + "TARGET_SSE3" + "@ + punpcklqdq\t%0, %0 + movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "mode" "TI,DF")]) (define_insn "*vec_dupv2di" [(set (match_operand:V2DI 0 "register_operand" "=Y2,x") @@ -7131,6 +7423,8 @@ punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_extra" "1,*,*,*,*") + (set_attr "length_immediate" "1,*,*,*,*") (set (attr "prefix") (if_then_else (eq_attr "alternative" "3,4") (const_string "orig") @@ -7151,6 +7445,7 @@ movd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") (set_attr "prefix_extra" "1,*,*,*,*") + (set_attr "length_immediate" "1,*,*,*,*") (set_attr "mode" "TI,TI,TI,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing @@ -7257,6 +7552,8 @@ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} vmovhps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov") + (set_attr "prefix_extra" "1,*,*,*,*,*") + (set_attr "length_immediate" "1,*,*,*,*,*") (set (attr "prefix") (if_then_else (eq_attr "alternative" "3") (const_string "orig") @@ -7278,7 +7575,9 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "prefix_rex" "1,*,1,*,*,*,*") (set_attr "prefix_extra" "1,*,*,*,*,*,*") + (set_attr "length_immediate" "1,*,*,*,*,*,*") (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")]) (define_insn "*vec_concatv2di_rex64_sse" @@ -7295,6 +7594,7 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2}" [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "prefix_rex" "*,1,*,*,*,*") (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) (define_expand "vec_unpacku_hi_v16qi" @@ -7304,8 +7604,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7318,8 +7616,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7332,8 +7628,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7346,8 +7640,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -7360,8 +7652,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7374,8 +7664,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7388,8 +7676,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7402,8 +7688,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -7416,8 +7700,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7430,8 +7712,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7444,8 +7724,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7458,8 +7736,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -7624,6 +7900,7 @@ "TARGET_SSE2" "psadbw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -7645,7 +7922,7 @@ UNSPEC_MOVMSK))] "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) @@ -7655,7 +7932,7 @@ UNSPEC_MOVMSK))] "TARGET_SSE2" "%vpmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) @@ -7678,8 +7955,10 @@ "TARGET_SSE2 && !TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") + ;; The implicit %rdi operand confuses default length_vex computation. + (set_attr "length_vex" "3") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -7692,8 +7971,11 @@ "TARGET_SSE2 && TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") + ;; The implicit %rdi operand confuses default length_vex computation. + (set (attr "length_vex") + (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1"))) (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -7703,6 +7985,7 @@ "TARGET_SSE" "%vldmxcsr\t%0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "mxcsr") (set_attr "prefix" "maybe_vex") (set_attr "memory" "load")]) @@ -7712,6 +7995,7 @@ "TARGET_SSE" "%vstmxcsr\t%0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "mxcsr") (set_attr "prefix" "maybe_vex") (set_attr "memory" "store")]) @@ -7730,6 +8014,8 @@ "TARGET_SSE || TARGET_3DNOW_A" "sfence" [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_insn "sse2_clflush" @@ -7738,6 +8024,7 @@ "TARGET_SSE2" "clflush\t%a0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_expand "sse2_mfence" @@ -7755,6 +8042,8 @@ "TARGET_64BIT || TARGET_SSE2" "mfence" [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_expand "sse2_lfence" @@ -7772,6 +8061,8 @@ "TARGET_SSE2" "lfence" [(set_attr "type" "sse") + (set_attr "length_address" "0") + (set_attr "atom_sse_attr" "lfence") (set_attr "memory" "unknown")]) (define_insn "sse3_mwait" @@ -7852,6 +8143,7 @@ "TARGET_AVX" "vphaddw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -7895,6 +8187,7 @@ "TARGET_SSSE3" "phaddw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -7923,7 +8216,9 @@ "TARGET_SSSE3" "phaddw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_phadddv4si3" @@ -7950,6 +8245,7 @@ "TARGET_AVX" "vphaddd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -7977,6 +8273,7 @@ "TARGET_SSSE3" "phaddd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -7997,7 +8294,9 @@ "TARGET_SSSE3" "phaddd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_phaddswv8hi3" @@ -8040,6 +8339,7 @@ "TARGET_AVX" "vphaddsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8083,6 +8383,7 @@ "TARGET_SSSE3" "phaddsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8111,7 +8412,9 @@ "TARGET_SSSE3" "phaddsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_phsubwv8hi3" @@ -8154,6 +8457,7 @@ "TARGET_AVX" "vphsubw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8197,6 +8501,7 @@ "TARGET_SSSE3" "phsubw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8225,7 +8530,9 @@ "TARGET_SSSE3" "phsubw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_phsubdv4si3" @@ -8252,6 +8559,7 @@ "TARGET_AVX" "vphsubd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8279,6 +8587,7 @@ "TARGET_SSSE3" "phsubd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8299,7 +8608,9 @@ "TARGET_SSSE3" "phsubd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_phsubswv8hi3" @@ -8342,6 +8653,7 @@ "TARGET_AVX" "vphsubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8385,6 +8697,7 @@ "TARGET_SSSE3" "phsubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8413,7 +8726,9 @@ "TARGET_SSSE3" "phsubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_pmaddubsw128" @@ -8466,6 +8781,7 @@ "TARGET_AVX" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8519,6 +8835,7 @@ "TARGET_SSSE3" "pmaddubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8557,7 +8874,9 @@ "TARGET_SSSE3" "pmaddubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "simul") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_expand "ssse3_pmulhrswv8hi3" @@ -8600,6 +8919,7 @@ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8664,6 +8984,7 @@ "pmulhrsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseimul") (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_pshufbv16qi3" @@ -8674,6 +8995,7 @@ "TARGET_AVX" "vpshufb\t{%2, %1, %0|%0, %1, %2}"; [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8698,6 +9020,7 @@ "pshufb\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_psign<mode>3" @@ -8709,6 +9032,7 @@ "TARGET_AVX" "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"; [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8735,6 +9059,7 @@ "psign<mmxvecsize>\t{%2, %0|%0, %2}"; [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "*avx_palignrti" @@ -8749,6 +9074,8 @@ return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sseishft") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -8764,8 +9091,10 @@ return "palignr\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sseishft") + (set_attr "atom_unit" "sishuf") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "ssse3_palignrdi" @@ -8780,7 +9109,10 @@ return "palignr\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sseishft") - (set_attr "prefix_extra" "1") + (set_attr "atom_unit" "sishuf") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) (define_insn "abs<mode>2" @@ -8800,7 +9132,9 @@ "TARGET_SSSE3" "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -8841,6 +9175,7 @@ "extrq\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse") (set_attr "prefix_data16" "1") + (set_attr "length_immediate" "2") (set_attr "mode" "TI")]) (define_insn "sse4a_extrq" @@ -8864,7 +9199,9 @@ "TARGET_SSE4A" "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" [(set_attr "type" "sseins") + (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "1") + (set_attr "length_immediate" "2") (set_attr "mode" "TI")]) (define_insn "sse4a_insertq" @@ -8875,6 +9212,7 @@ "TARGET_SSE4A" "insertq\t{%2, %0|%0, %2}" [(set_attr "type" "sseins") + (set_attr "prefix_data16" "0") (set_attr "prefix_rep" "1") (set_attr "mode" "TI")]) @@ -8893,6 +9231,8 @@ "TARGET_AVX" "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")]) @@ -8906,6 +9246,8 @@ "TARGET_AVX" "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")]) @@ -8918,7 +9260,9 @@ "TARGET_SSE4_1" "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "<MODE>")]) (define_insn "sse4_1_blendvp<ssemodesuffixf2c>" @@ -8931,6 +9275,7 @@ "TARGET_SSE4_1" "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "<MODE>")]) @@ -8945,6 +9290,8 @@ "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemul") (set_attr "prefix" "vex") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "<avxvecmode>")]) (define_insn "sse4_1_dpp<ssemodesuffixf2c>" @@ -8957,7 +9304,9 @@ "TARGET_SSE4_1" "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemul") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "<MODE>")]) (define_insn "sse4_1_movntdqa" @@ -8966,7 +9315,7 @@ UNSPEC_MOVNTDQA))] "TARGET_SSE4_1" "%vmovntdqa\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -8981,6 +9330,8 @@ "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sselog1") (set_attr "prefix" "vex") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "sse4_1_mpsadbw" @@ -8993,6 +9344,7 @@ "mpsadbw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "*avx_packusdw" @@ -9005,6 +9357,7 @@ "TARGET_AVX" "vpackusdw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -9030,6 +9383,8 @@ "TARGET_AVX" "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -9055,6 +9410,8 @@ "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "sse4_1_pblendw" @@ -9067,6 +9424,7 @@ "pblendw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_insn "sse4_1_phminposuw" @@ -9478,6 +9836,7 @@ "TARGET_AVX" "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -9491,6 +9850,7 @@ "TARGET_AVX" "vptest\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -9515,6 +9875,8 @@ "TARGET_AVX" "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -9527,7 +9889,9 @@ "TARGET_ROUND" "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) @@ -9543,6 +9907,8 @@ "TARGET_AVX" "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -9558,7 +9924,9 @@ "TARGET_ROUND" "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "<MODE>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -9593,7 +9961,7 @@ (match_dup 6)] UNSPEC_PCMPESTR))] "TARGET_SSE4_2 - && !(reload_completed || reload_in_progress)" + && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] @@ -9620,6 +9988,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9646,6 +10015,7 @@ (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") + (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9671,6 +10041,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9695,6 +10066,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -9719,7 +10091,7 @@ (match_dup 4)] UNSPEC_PCMPISTR))] "TARGET_SSE4_2 - && !(reload_completed || reload_in_progress)" + && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] @@ -9743,6 +10115,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9764,6 +10137,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9786,6 +10160,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -9808,240 +10183,136 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; SSE5 instructions +;; XOP instructions ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SSE5 parallel integer multiply/add instructions. -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; -(define_insn "sse5_pmacsww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") +;; XOP parallel integer multiply/add instructions. +;; Note the XOP multiply/add instructions +;; a[i] = b[i] * c[i] + d[i]; +;; do not allow the value being added to be a memory operation. +(define_insn "xop_pmacsww" + [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI (mult:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") - (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:V8HI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V8HI 1 "nonimmediate_operand" "%x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (match_operand:V8HI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Split pmacsww with two memory operands into a load and the pmacsww. -(define_split - [(set (match_operand:V8HI 0 "register_operand" "") - (plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (match_operand:V8HI 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, V8HImode); - emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_insn "sse5_pmacssww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacssww" + [(set (match_operand:V8HI 0 "register_operand" "=x") (ss_plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V8HI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (match_operand:V8HI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; -(define_insn "sse5_pmacsdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacsdd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (mult:V4SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Split pmacsdd with two memory operands into a load and the pmacsdd. -(define_split - [(set (match_operand:V4SI 0 "register_operand" "") - (plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (match_operand:V4SI 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, V4SImode); - emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_insn "sse5_pmacssdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacssdd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmacssdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacssdql" + [(set (match_operand:V2DI 0 "register_operand" "=x") (ss_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3)]))) - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 1) + (const_int 3)]))) + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmacssdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacssdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x") (ss_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmacsdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacsdql" + [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn_and_split "*sse5_pmacsdql_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3)])))) - (match_operand:V2DI 3 "memory_operand" "m,m,m")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))]) - -;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so ;; fake it with a multiply/add. In general, we expect the define_split to ;; occur before register allocation, so we have to handle the corner case where ;; the target is the same as operands 1/2 -(define_insn_and_split "sse5_mulv2div2di3_low" +(define_insn_and_split "xop_mulv2div2di3_low" [(set (match_operand:V2DI 0 "register_operand" "=&x") (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI @@ -10049,11 +10320,9 @@ (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)])))))] - "TARGET_SSE5" + "TARGET_XOP" "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" + "&& reload_completed" [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) @@ -10076,76 +10345,36 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmacsdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacsdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn_and_split "*sse5_pmacsdqh_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "memory_operand" "m,m,m")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))]) - -;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so ;; fake it with a multiply/add. In general, we expect the define_split to ;; occur before register allocation, so we have to handle the corner case where ;; the target is the same as either operands[1] or operands[2] -(define_insn_and_split "sse5_mulv2div2di3_high" +(define_insn_and_split "xop_mulv2div2di3_high" [(set (match_operand:V2DI 0 "register_operand" "=&x") (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI @@ -10153,11 +10382,9 @@ (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))))] - "TARGET_SSE5" + "TARGET_XOP" "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" + "&& reload_completed" [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) @@ -10180,76 +10407,70 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; SSE5 parallel integer multiply/add instructions for the intrinisics -(define_insn "sse5_pmacsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +;; XOP parallel integer multiply/add instructions for the intrinisics +(define_insn "xop_pmacsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmacswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmacswd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmadcsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmadcsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10269,30 +10490,27 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "sse5_pmadcswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") +(define_insn "xop_pmadcswd" + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10312,32 +10530,35 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] + "TARGET_XOP" + "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; SSE5 parallel XMM conditional moves -(define_insn "sse5_pcmov_<mode>" - [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x") +;; XOP parallel XMM conditional moves +(define_insn "xop_pcmov_<mode>" + [(set (match_operand:SSEMODE 0 "register_operand" "=x,x") (if_then_else:SSEMODE - (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x") - (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0") - (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "@ - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m") + (match_operand:SSEMODE 1 "vector_move_operand" "x,x") + (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))] + "TARGET_XOP" + "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) -;; SSE5 horizontal add/subtract instructions -(define_insn "sse5_phaddbw" +(define_insn "xop_pcmov_<mode>256" + [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") + (if_then_else:AVX256MODE + (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m") + (match_operand:AVX256MODE 1 "vector_move_operand" "x,x") + (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))] + "TARGET_XOP" + "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + +;; XOP horizontal add/subtract instructions +(define_insn "xop_phaddbw" [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI (sign_extend:V8HI @@ -10362,11 +10583,11 @@ (const_int 11) (const_int 13) (const_int 15)])))))] - "TARGET_SSE5" - "phaddbw\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddbw\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddbd" +(define_insn "xop_phaddbd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (plus:V4SI @@ -10399,11 +10620,11 @@ (const_int 7) (const_int 11) (const_int 15)]))))))] - "TARGET_SSE5" - "phaddbd\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddbd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddbq" +(define_insn "xop_phaddbq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI @@ -10452,11 +10673,11 @@ (match_dup 1) (parallel [(const_int 11) (const_int 15)])))))))] - "TARGET_SSE5" - "phaddbq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddbq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddwd" +(define_insn "xop_phaddwd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (sign_extend:V4SI @@ -10473,11 +10694,11 @@ (const_int 3) (const_int 5) (const_int 7)])))))] - "TARGET_SSE5" - "phaddwd\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddwd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddwq" +(define_insn "xop_phaddwq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI @@ -10502,11 +10723,11 @@ (match_dup 1) (parallel [(const_int 3) (const_int 7)]))))))] - "TARGET_SSE5" - "phaddwq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddwq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phadddq" +(define_insn "xop_phadddq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (sign_extend:V2DI @@ -10519,11 +10740,11 @@ (match_dup 1) (parallel [(const_int 1) (const_int 3)])))))] - "TARGET_SSE5" - "phadddq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphadddq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddubw" +(define_insn "xop_phaddubw" [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI (zero_extend:V8HI @@ -10548,11 +10769,11 @@ (const_int 11) (const_int 13) (const_int 15)])))))] - "TARGET_SSE5" - "phaddubw\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddubw\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddubd" +(define_insn "xop_phaddubd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (plus:V4SI @@ -10585,11 +10806,11 @@ (const_int 7) (const_int 11) (const_int 15)]))))))] - "TARGET_SSE5" - "phaddubd\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddubd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddubq" +(define_insn "xop_phaddubq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI @@ -10638,11 +10859,11 @@ (match_dup 1) (parallel [(const_int 11) (const_int 15)])))))))] - "TARGET_SSE5" - "phaddubq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddubq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phadduwd" +(define_insn "xop_phadduwd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (zero_extend:V4SI @@ -10659,11 +10880,11 @@ (const_int 3) (const_int 5) (const_int 7)])))))] - "TARGET_SSE5" - "phadduwd\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphadduwd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phadduwq" +(define_insn "xop_phadduwq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI @@ -10688,11 +10909,11 @@ (match_dup 1) (parallel [(const_int 3) (const_int 7)]))))))] - "TARGET_SSE5" - "phadduwq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphadduwq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phaddudq" +(define_insn "xop_phaddudq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (zero_extend:V2DI @@ -10705,11 +10926,11 @@ (match_dup 1) (parallel [(const_int 1) (const_int 3)])))))] - "TARGET_SSE5" - "phaddudq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphaddudq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phsubbw" +(define_insn "xop_phsubbw" [(set (match_operand:V8HI 0 "register_operand" "=x") (minus:V8HI (sign_extend:V8HI @@ -10734,11 +10955,11 @@ (const_int 11) (const_int 13) (const_int 15)])))))] - "TARGET_SSE5" - "phsubbw\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphsubbw\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phsubwd" +(define_insn "xop_phsubwd" [(set (match_operand:V4SI 0 "register_operand" "=x") (minus:V4SI (sign_extend:V4SI @@ -10755,11 +10976,11 @@ (const_int 3) (const_int 5) (const_int 7)])))))] - "TARGET_SSE5" - "phsubwd\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphsubwd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "sse5_phsubdq" +(define_insn "xop_phsubdq" [(set (match_operand:V2DI 0 "register_operand" "=x") (minus:V2DI (sign_extend:V2DI @@ -10772,172 +10993,70 @@ (match_dup 1) (parallel [(const_int 1) (const_int 3)])))))] - "TARGET_SSE5" - "phsubdq\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vphsubdq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -;; SSE5 permute instructions -(define_insn "sse5_pperm" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") +;; XOP permute instructions +(define_insn "xop_pperm" + [(set (match_operand:V16QI 0 "register_operand" "=x,x") (unspec:V16QI - [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") - (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] - UNSPEC_SSE5_PERMUTE))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] + UNSPEC_XOP_PERMUTE))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) -;; The following are for the various unpack insns which doesn't need the first -;; source operand, so we can just use the output operand for the first operand. -;; This allows either of the other two operands to be a memory operand. We -;; can't just use the first operand as an argument to the normal pperm because -;; then an output only argument, suddenly becomes an input operand. -(define_insn "sse5_pperm_zero_v16qi_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V16QImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v16qi_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V16QImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_zero_v8hi_v4si" +;; XOP pack instructions that combine two vectors into a smaller vector +(define_insn "xop_pperm_pack_v2di_v4si" [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (zero_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V8HImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v8hi_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V8HImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_zero_v4si_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V4SImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v4si_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V4SImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "mode" "TI")]) - -;; SSE5 pack instructions that combine two vectors into a smaller vector -(define_insn "sse5_pperm_pack_v2di_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x") (vec_concat:V4SI (truncate:V2SI - (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm")) + (match_operand:V2DI 1 "register_operand" "x,x")) (truncate:V2SI - (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) -(define_insn "sse5_pperm_pack_v4si_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") +(define_insn "xop_pperm_pack_v4si_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (truncate:V4HI - (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm")) + (match_operand:V4SI 1 "register_operand" "x,x")) (truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) -(define_insn "sse5_pperm_pack_v8hi_v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") +(define_insn "xop_pperm_pack_v8hi_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x,x") (vec_concat:V16QI (truncate:V8QI - (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm")) + (match_operand:V8HI 1 "register_operand" "x,x")) (truncate:V8QI - (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) -;; Floating point permutation (permps, permpd) -(define_insn "sse5_perm<mode>" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] - UNSPEC_SSE5_PERMUTE))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "<MODE>")]) - -;; SSE5 packed rotate instructions +;; XOP packed rotate instructions (define_expand "rotl<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "") (rotate:SSEMODE1248 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") (match_operand:SI 2 "general_operand")))] - "TARGET_SSE5" + "TARGET_XOP" { /* If we were given a scalar, convert it to parallel */ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) @@ -10958,7 +11077,7 @@ RTVEC_ELT (vs, i) = op2; emit_insn (gen_vec_init<mode> (reg, par)); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); DONE; } }) @@ -10968,7 +11087,7 @@ (rotatert:SSEMODE1248 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") (match_operand:SI 2 "general_operand")))] - "TARGET_SSE5" + "TARGET_XOP" { /* If we were given a scalar, convert it to parallel */ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) @@ -10991,43 +11110,45 @@ emit_insn (gen_vec_init<mode> (reg, par)); emit_insn (gen_neg<mode>2 (neg, reg)); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); DONE; } }) -(define_insn "sse5_rotl<mode>3" +(define_insn "xop_rotl<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (rotate:SSEMODE1248 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] - "TARGET_SSE5" - "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + "TARGET_XOP" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "sse5_rotr<mode>3" + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_rotr<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (rotatert:SSEMODE1248 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] - "TARGET_SSE5" + "TARGET_XOP" { operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); - return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; + return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; } [(set_attr "type" "sseishft") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "vrotr<mode>3" [(match_operand:SSEMODE1248 0 "register_operand" "") (match_operand:SSEMODE1248 1 "register_operand" "") (match_operand:SSEMODE1248 2 "register_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtx reg = gen_reg_rtx (<MODE>mode); emit_insn (gen_neg<mode>2 (reg, operands[2])); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); DONE; }) @@ -11035,40 +11156,42 @@ [(match_operand:SSEMODE1248 0 "register_operand" "") (match_operand:SSEMODE1248 1 "register_operand" "") (match_operand:SSEMODE1248 2 "register_operand" "")] - "TARGET_SSE5" -{ - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_insn "sse5_vrotl<mode>3" + "TARGET_XOP" +{ + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_vrotl<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") (if_then_else:SSEMODE1248 (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") (const_int 0)) (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") (match_dup 2)) (rotatert:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -;; SSE5 packed shift instructions. + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; XOP packed shift instructions. ;; FIXME: add V2DI back in (define_expand "vlshr<mode>3" [(match_operand:SSEMODE124 0 "register_operand" "") (match_operand:SSEMODE124 1 "register_operand" "") (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtx neg = gen_reg_rtx (<MODE>mode); emit_insn (gen_neg<mode>2 (neg, operands[2])); - emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg)); + emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg)); DONE; }) @@ -11076,11 +11199,11 @@ [(match_operand:SSEMODE124 0 "register_operand" "") (match_operand:SSEMODE124 1 "register_operand" "") (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtx neg = gen_reg_rtx (<MODE>mode); emit_insn (gen_neg<mode>2 (neg, operands[2])); - emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg)); + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg)); DONE; }) @@ -11088,52 +11211,56 @@ [(match_operand:SSEMODE124 0 "register_operand" "") (match_operand:SSEMODE124 1 "register_operand" "") (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" -{ - emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_insn "sse5_ashl<mode>3" + "TARGET_XOP" +{ + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_ashl<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") (if_then_else:SSEMODE1248 (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") (const_int 0)) (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") (match_dup 2)) (ashiftrt:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "sse5_lshl<mode>3" + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +(define_insn "xop_lshl<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") (if_then_else:SSEMODE1248 (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") (const_int 0)) (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") (match_dup 2)) (lshiftrt:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -;; SSE2 doesn't have some shift varients, so define versions for SSE5 + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; SSE2 doesn't have some shift varients, so define versions for XOP (define_expand "ashlv16qi3" [(match_operand:V16QI 0 "register_operand" "") (match_operand:V16QI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtvec vs = rtvec_alloc (16); rtx par = gen_rtx_PARALLEL (V16QImode, vs); @@ -11143,7 +11270,7 @@ RTVEC_ELT (vs, i) = operands[2]; emit_insn (gen_vec_initv16qi (reg, par)); - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); DONE; }) @@ -11151,7 +11278,7 @@ [(match_operand:V16QI 0 "register_operand" "") (match_operand:V16QI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtvec vs = rtvec_alloc (16); rtx par = gen_rtx_PARALLEL (V16QImode, vs); @@ -11161,7 +11288,7 @@ RTVEC_ELT (vs, i) = operands[2]; emit_insn (gen_vec_initv16qi (reg, par)); - emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg)); + emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg)); DONE; }) @@ -11169,13 +11296,13 @@ [(match_operand:V16QI 0 "register_operand" "") (match_operand:V16QI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtvec vs = rtvec_alloc (16); rtx par = gen_rtx_PARALLEL (V16QImode, vs); rtx reg = gen_reg_rtx (V16QImode); int i; - rtx ele = ((GET_CODE (operands[2]) == CONST_INT) + rtx ele = ((CONST_INT_P (operands[2])) ? GEN_INT (- INTVAL (operands[2])) : operands[2]); @@ -11184,14 +11311,14 @@ emit_insn (gen_vec_initv16qi (reg, par)); - if (GET_CODE (operands[2]) != CONST_INT) + if (!CONST_INT_P (operands[2])) { rtx neg = gen_reg_rtx (V16QImode); emit_insn (gen_negv16qi2 (neg, reg)); - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg)); } else - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); DONE; }) @@ -11200,14 +11327,14 @@ [(match_operand:V2DI 0 "register_operand" "") (match_operand:V2DI 1 "register_operand" "") (match_operand:DI 2 "nonmemory_operand" "")] - "TARGET_SSE5" + "TARGET_XOP" { rtvec vs = rtvec_alloc (2); rtx par = gen_rtx_PARALLEL (V2DImode, vs); rtx reg = gen_reg_rtx (V2DImode); rtx ele; - if (GET_CODE (operands[2]) == CONST_INT) + if (CONST_INT_P (operands[2])) ele = GEN_INT (- INTVAL (operands[2])); else if (GET_MODE (operands[2]) != DImode) { @@ -11225,25 +11352,24 @@ RTVEC_ELT (vs, 0) = ele; RTVEC_ELT (vs, 1) = ele; emit_insn (gen_vec_initv2di (reg, par)); - emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg)); - DONE; -}) - -;; SSE5 FRCZ support + emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg)); + DONE; +}) + +;; XOP FRCZ support ;; parallel insns -(define_insn "sse5_frcz<mode>2" +(define_insn "xop_frcz<mode>2" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") (unspec:SSEMODEF2P [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] UNSPEC_FRCZ))] - "TARGET_SSE5" - "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" + "TARGET_XOP" + "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt1") - (set_attr "prefix_extra" "1") (set_attr "mode" "<MODE>")]) ;; scalar insns -(define_insn "sse5_vmfrcz<mode>2" +(define_insn "xop_vmfrcz<mode>2" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") (vec_merge:SSEMODEF2P (unspec:SSEMODEF2P @@ -11251,163 +11377,89 @@ UNSPEC_FRCZ) (match_operand:SSEMODEF2P 1 "register_operand" "0") (const_int 1)))] - "TARGET_SSE5" - "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" + "TARGET_XOP" + "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt1") - (set_attr "prefix_extra" "1") (set_attr "mode" "<MODE>")]) -(define_insn "sse5_cvtph2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")] - UNSPEC_CVTPH2PS))] - "TARGET_SSE5" - "cvtph2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse5_cvtps2ph" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm") - (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_CVTPS2PH))] - "TARGET_SSE5" - "cvtps2ph\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -;; Scalar versions of the com instructions that use vector types that are -;; called from the intrinsics. Unlike the the other s{s,d} instructions, the -;; com instructions fill in 0's in the upper bits instead of leaving them -;; unmodified, so we use const_vector of 0 instead of match_dup. -(define_expand "sse5_vmmaskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")]) - (match_dup 4) - (const_int 1)))] - "TARGET_SSE5" -{ - operands[4] = CONST0_RTX (<MODE>mode); -}) - -(define_insn "*sse5_vmmaskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]) - (match_operand:SSEMODEF2P 4 "") - (const_int 1)))] - "TARGET_SSE5" - "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "<ssescalarmode>")]) - -;; We don't have a comparison operator that always returns true/false, so -;; handle comfalse and comtrue specially. -(define_insn "sse5_com_tf<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SSE5_TRUEFALSE))] - "TARGET_SSE5" -{ - const char *ret = NULL; - - switch (INTVAL (operands[3])) - { - case COM_FALSE_S: - ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_FALSE_P: - ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_TRUE_S: - ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_TRUE_P: - ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - default: - gcc_unreachable (); - } - - return ret; -} - [(set_attr "type" "ssecmp") +(define_insn "xop_frcz<mode>2256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x") + (unspec:FMA4MODEF4 + [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ))] + "TARGET_XOP" + "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt1") (set_attr "mode" "<MODE>")]) -(define_insn "sse5_maskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "<MODE>")]) - -(define_insn "sse5_maskcmp<mode>3" +(define_insn "xop_maskcmp<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" [(match_operand:SSEMODE1248 2 "register_operand" "x") (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + "TARGET_XOP" + "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sse4arg") - (set_attr "mode" "TI")]) - -(define_insn "sse5_maskcmp_uns<mode>3" + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_maskcmp_uns<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" [(match_operand:SSEMODE1248 2 "register_operand" "x") (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* ;; and pcomneu* not to be converted to the signed ones in case somebody needs ;; the exact instruction generated for the intrinsic. -(define_insn "sse5_maskcmp_uns2<mode>3" +(define_insn "xop_maskcmp_uns2<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (unspec:SSEMODE1248 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" [(match_operand:SSEMODE1248 2 "register_operand" "x") (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] - UNSPEC_SSE5_UNSIGNED_CMP))] - "TARGET_SSE5" - "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + UNSPEC_XOP_UNSIGNED_CMP))] + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) ;; Pcomtrue and pcomfalse support. These are useless instructions, but are ;; being added here to be complete. -(define_insn "sse5_pcom_tf<mode>3" +(define_insn "xop_pcom_tf<mode>3" [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x") (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SSE5_TRUEFALSE))] - "TARGET_SSE5" + UNSPEC_XOP_TRUEFALSE))] + "TARGET_XOP" { return ((INTVAL (operands[3]) != 0) - ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); + ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); } [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "*avx_aesenc" [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") @@ -11416,6 +11468,7 @@ "TARGET_AES && TARGET_AVX" "vaesenc\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -11438,6 +11491,7 @@ "TARGET_AES && TARGET_AVX" "vaesenclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -11460,6 +11514,7 @@ "TARGET_AES && TARGET_AVX" "vaesdec\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -11482,6 +11537,7 @@ "TARGET_AES && TARGET_AVX" "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -11516,6 +11572,7 @@ "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11528,6 +11585,8 @@ "TARGET_PCLMUL && TARGET_AVX" "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) @@ -11541,6 +11600,7 @@ "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) (define_expand "avx_vzeroall" @@ -11565,68 +11625,211 @@ (define_insn "*avx_vzeroall" [(match_parallel 0 "vzeroall_operation" - [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL) - (set (match_operand 1 "register_operand" "=x") - (match_operand 2 "const0_operand" "X"))])] + [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] "TARGET_AVX" "vzeroall" [(set_attr "type" "sse") + (set_attr "modrm" "0") (set_attr "memory" "none") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) ;; vzeroupper clobbers the upper 128bits of AVX registers. -(define_insn "avx_vzeroupper" - [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) - (clobber (reg:V8SI XMM0_REG)) - (clobber (reg:V8SI XMM1_REG)) - (clobber (reg:V8SI XMM2_REG)) - (clobber (reg:V8SI XMM3_REG)) - (clobber (reg:V8SI XMM4_REG)) - (clobber (reg:V8SI XMM5_REG)) - (clobber (reg:V8SI XMM6_REG)) - (clobber (reg:V8SI XMM7_REG))] - "TARGET_AVX && !TARGET_64BIT" +(define_expand "avx_vzeroupper" + [(match_par_dup 0 [(const_int 0)])] + "TARGET_AVX" +{ + int nregs = TARGET_64BIT ? 16 : 8; + int regno; + + operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); + + XVECEXP (operands[0], 0, 0) + = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), + UNSPECV_VZEROUPPER); + + for (regno = 0; regno < nregs; regno++) + XVECEXP (operands[0], 0, regno + 1) + = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (V8SImode, SSE_REGNO (regno))); +}) + +(define_insn "*avx_vzeroupper" + [(match_parallel 0 "vzeroupper_operation" + [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] + "TARGET_AVX" "vzeroupper" [(set_attr "type" "sse") + (set_attr "modrm" "0") (set_attr "memory" "none") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn "avx_vzeroupper_rex64" - [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) - (clobber (reg:V8SI XMM0_REG)) - (clobber (reg:V8SI XMM1_REG)) - (clobber (reg:V8SI XMM2_REG)) - (clobber (reg:V8SI XMM3_REG)) - (clobber (reg:V8SI XMM4_REG)) - (clobber (reg:V8SI XMM5_REG)) - (clobber (reg:V8SI XMM6_REG)) - (clobber (reg:V8SI XMM7_REG)) - (clobber (reg:V8SI XMM8_REG)) - (clobber (reg:V8SI XMM9_REG)) - (clobber (reg:V8SI XMM10_REG)) - (clobber (reg:V8SI XMM11_REG)) - (clobber (reg:V8SI XMM12_REG)) - (clobber (reg:V8SI XMM13_REG)) - (clobber (reg:V8SI XMM14_REG)) - (clobber (reg:V8SI XMM15_REG))] - "TARGET_AVX && TARGET_64BIT" - "vzeroupper" - [(set_attr "type" "sse") - (set_attr "memory" "none") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "avx_vpermil<mode>" +(define_insn_and_split "vec_dup<mode>" + [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x") + (vec_duplicate:AVX256MODE24P + (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))] + "TARGET_AVX" + "@ + vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1} + #" + "&& reload_completed && REG_P (operands[1])" + [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1))) + (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] +{ + operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0])); +} + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + +(define_insn "avx_vbroadcastf128_<mode>" + [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") + (vec_concat:AVX256MODE + (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x") + (match_dup 1)))] + "TARGET_AVX" + "@ + vbroadcastf128\t{%1, %0|%0, %1} + vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1} + vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" + [(set_attr "type" "ssemov,sselog1,sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "0,1,1") + (set_attr "prefix" "vex") + (set_attr "mode" "V4SF,V8SF,V8SF")]) + +;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. +;; If it so happens that the input is in memory, use vbroadcast. +;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). +(define_insn "*avx_vperm_broadcast_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + (vec_select:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") + (match_parallel 2 "avx_vbroadcast_operand" + [(match_operand 3 "const_int_operand" "C,n,n")])))] + "TARGET_AVX" +{ + int elt = INTVAL (operands[3]); + switch (which_alternative) + { + case 0: + case 1: + operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); + return "vbroadcastss\t{%1, %0|%0, %1}"; + case 2: + operands[2] = GEN_INT (elt * 0x55); + return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "0,0,1") + (set_attr "prefix" "vex") + (set_attr "mode" "SF,SF,V4SF")]) + +(define_insn_and_split "*avx_vperm_broadcast_<mode>" + [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x") + (vec_select:AVX256MODEF2P + (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x") + (match_parallel 2 "avx_vbroadcast_operand" + [(match_operand 3 "const_int_operand" "C,n,n")])))] + "TARGET_AVX" + "#" + "&& reload_completed" + [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))] +{ + rtx op0 = operands[0], op1 = operands[1]; + int elt = INTVAL (operands[3]); + + if (REG_P (op1)) + { + int mask; + + /* Shuffle element we care about into all elements of the 128-bit lane. + The other lane gets shuffled too, but we don't care. */ + if (<MODE>mode == V4DFmode) + mask = (elt & 1 ? 15 : 0); + else + mask = (elt & 3) * 0x55; + emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); + + /* Shuffle the lane we care about into both lanes of the dest. */ + mask = (elt / (<ssescalarnum> / 2)) * 0x11; + emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); + DONE; + } + + operands[1] = adjust_address_nv (op1, <avxscalarmode>mode, + elt * GET_MODE_SIZE (<avxscalarmode>mode)); +}) + +(define_expand "avx_vpermil<mode>" + [(set (match_operand:AVXMODEFDP 0 "register_operand" "") + (vec_select:AVXMODEFDP + (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + rtx perm[<ssescalarnum>]; + + perm[0] = GEN_INT (mask & 1); + perm[1] = GEN_INT ((mask >> 1) & 1); + if (<MODE>mode == V4DFmode) + { + perm[2] = GEN_INT (((mask >> 2) & 1) + 2); + perm[3] = GEN_INT (((mask >> 3) & 1) + 2); + } + + operands[2] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); +}) + +(define_expand "avx_vpermil<mode>" + [(set (match_operand:AVXMODEFSP 0 "register_operand" "") + (vec_select:AVXMODEFSP + (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + rtx perm[<ssescalarnum>]; + + perm[0] = GEN_INT (mask & 3); + perm[1] = GEN_INT ((mask >> 2) & 3); + perm[2] = GEN_INT ((mask >> 4) & 3); + perm[3] = GEN_INT ((mask >> 6) & 3); + if (<MODE>mode == V8SFmode) + { + perm[4] = GEN_INT ((mask & 3) + 4); + perm[5] = GEN_INT (((mask >> 2) & 3) + 4); + perm[6] = GEN_INT (((mask >> 4) & 3) + 4); + perm[7] = GEN_INT (((mask >> 6) & 3) + 4); + } + + operands[2] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); +}) + +(define_insn "*avx_vpermilp<mode>" [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "xm") - (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")] - UNSPEC_VPERMIL))] - "TARGET_AVX" - "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") + (vec_select:AVXMODEF2P + (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm") + (match_parallel 2 "avx_vpermilp_<mode>_operand" + [(match_operand 3 "const_int_operand" "")])))] + "TARGET_AVX" +{ + int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; + operands[2] = GEN_INT (mask); + return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -11639,10 +11842,48 @@ "TARGET_AVX" "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) -(define_insn "avx_vperm2f128<mode>3" +(define_expand "avx_vperm2f128<mode>3" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "") + (unspec:AVX256MODE2P + [(match_operand:AVX256MODE2P 1 "register_operand" "") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_0_to_255_operand" "")] + UNSPEC_VPERMIL2F128))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + if ((mask & 0x88) == 0) + { + rtx perm[<ssescalarnum>], t1, t2; + int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; + + base = (mask & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i] = GEN_INT (base + i); + + base = ((mask >> 4) & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i + nelt2] = GEN_INT (base + i); + + t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode, + operands[1], operands[2]); + t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); + t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); + t2 = gen_rtx_SET (VOIDmode, operands[0], t2); + emit_insn (t2); + DONE; + } +}) + +;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which +;; means that in order to represent this properly in rtl we'd have to +;; nest *another* vec_concat with a zero operand and do the select from +;; a 4x wide vector. That doesn't seem very nice. +(define_insn "*avx_vperm2f128<mode>_full" [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") (unspec:AVX256MODE2P [(match_operand:AVX256MODE2P 1 "register_operand" "x") @@ -11652,57 +11893,30 @@ "TARGET_AVX" "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>" - [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x") - (vec_concat:AVXMODEF4P - (vec_concat:<avxhalfvecmode> - (match_operand:<avxscalarmode> 1 "memory_operand" "m") - (match_dup 1)) - (vec_concat:<avxhalfvecmode> - (match_dup 1) - (match_dup 1))))] - "TARGET_AVX" - "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "<avxscalarmode>")]) - -(define_insn "avx_vbroadcastss256" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (vec_concat:V8SF - (vec_concat:V4SF - (vec_concat:V2SF - (match_operand:SF 1 "memory_operand" "m") - (match_dup 1)) - (vec_concat:V2SF - (match_dup 1) - (match_dup 1))) - (vec_concat:V4SF - (vec_concat:V2SF - (match_dup 1) - (match_dup 1)) - (vec_concat:V2SF - (match_dup 1) - (match_dup 1)))))] - "TARGET_AVX" - "vbroadcastss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "SF")]) - -(define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x") - (vec_concat:AVX256MODEF2P - (match_operand:<avxhalfvecmode> 1 "memory_operand" "m") - (match_dup 1)))] - "TARGET_AVX" - "vbroadcastf128\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "vex") - (set_attr "mode" "V4SF")]) +(define_insn "*avx_vperm2f128<mode>_nozero" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") + (vec_select:AVX256MODE2P + (vec_concat:<ssedoublesizemode> + (match_operand:AVX256MODE2P 1 "register_operand" "x") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) + (match_parallel 3 "avx_vperm2f128_<mode>_operand" + [(match_operand 4 "const_int_operand" "")])))] + "TARGET_AVX" +{ + int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; + operands[3] = GEN_INT (mask); + return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) (define_expand "avx_vinsertf128<mode>" [(match_operand:AVX256MODE 0 "register_operand" "") @@ -11737,6 +11951,8 @@ "TARGET_AVX" "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11750,6 +11966,8 @@ "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11764,6 +11982,8 @@ "TARGET_AVX" "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11778,6 +11998,8 @@ "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11794,6 +12016,8 @@ "TARGET_AVX" "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11810,6 +12034,8 @@ "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11830,6 +12056,8 @@ "TARGET_AVX" "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11850,6 +12078,8 @@ "TARGET_AVX" "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -11863,6 +12093,7 @@ "TARGET_AVX" "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -11876,6 +12107,7 @@ "TARGET_AVX" "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) @@ -11986,5 +12218,7 @@ } } [(set_attr "type" "sselog,ssemov") + (set_attr "prefix_extra" "1,*") + (set_attr "length_immediate" "1,*") (set_attr "prefix" "vex") (set_attr "mode" "<avxvecmode>")])