Mercurial > hg > CbC > CbC_gcc
diff gcc/config/rs6000/altivec.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/config/rs6000/altivec.md Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/config/rs6000/altivec.md Fri Oct 27 22:46:09 2017 +0900 @@ -1,6 +1,5 @@ ;; AltiVec patterns. -;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 -;; Free Software Foundation, Inc. +;; Copyright (C) 2002-2017 Free Software Foundation, Inc. ;; Contributed by Aldy Hernandez (aldy@quesejoda.com) ;; This file is part of GCC. @@ -19,162 +18,230 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -(define_constants - ;; 51-62 deleted - [(UNSPEC_VCMPBFP 64) - (UNSPEC_VMSUMU 65) - (UNSPEC_VMSUMM 66) - (UNSPEC_VMSUMSHM 68) - (UNSPEC_VMSUMUHS 69) - (UNSPEC_VMSUMSHS 70) - (UNSPEC_VMHADDSHS 71) - (UNSPEC_VMHRADDSHS 72) - (UNSPEC_VMLADDUHM 73) - (UNSPEC_VADDCUW 75) - (UNSPEC_VADDU 76) - (UNSPEC_VADDS 77) - (UNSPEC_VAVGU 80) - (UNSPEC_VAVGS 81) - (UNSPEC_VMULEUB 83) - (UNSPEC_VMULESB 84) - (UNSPEC_VMULEUH 85) - (UNSPEC_VMULESH 86) - (UNSPEC_VMULOUB 87) - (UNSPEC_VMULOSB 88) - (UNSPEC_VMULOUH 89) - (UNSPEC_VMULOSH 90) - (UNSPEC_VPKUHUM 93) - (UNSPEC_VPKUWUM 94) - (UNSPEC_VPKPX 95) - (UNSPEC_VPKSHSS 97) - (UNSPEC_VPKSWSS 99) - (UNSPEC_VPKUHUS 100) - (UNSPEC_VPKSHUS 101) - (UNSPEC_VPKUWUS 102) - (UNSPEC_VPKSWUS 103) - ;; 104 deleted - (UNSPEC_VSLV4SI 110) - (UNSPEC_VSLO 111) - (UNSPEC_VSR 118) - (UNSPEC_VSRO 119) - (UNSPEC_VSUBCUW 124) - (UNSPEC_VSUBU 125) - (UNSPEC_VSUBS 126) - (UNSPEC_VSUM4UBS 131) - (UNSPEC_VSUM4S 132) - (UNSPEC_VSUM2SWS 134) - (UNSPEC_VSUMSWS 135) - (UNSPEC_VPERM 144) - (UNSPEC_VPERM_UNS 145) - ;; 148 deleted - (UNSPEC_VRFIN 149) - ;; 150 deleted - (UNSPEC_VCFUX 151) - (UNSPEC_VCFSX 152) - (UNSPEC_VCTUXS 153) - (UNSPEC_VCTSXS 154) - (UNSPEC_VLOGEFP 155) - (UNSPEC_VEXPTEFP 156) - ;; 157-162 deleted - (UNSPEC_VLSDOI 163) - (UNSPEC_VUPKHSB 167) - (UNSPEC_VUPKHPX 168) - (UNSPEC_VUPKHSH 169) - (UNSPEC_VUPKLSB 170) - (UNSPEC_VUPKLPX 171) - (UNSPEC_VUPKLSH 172) - ;; 173 deleted - (UNSPEC_DST 190) - (UNSPEC_DSTT 191) - (UNSPEC_DSTST 192) - (UNSPEC_DSTSTT 193) - (UNSPEC_LVSL 194) - (UNSPEC_LVSR 195) - (UNSPEC_LVE 196) - (UNSPEC_STVX 201) - (UNSPEC_STVXL 202) - (UNSPEC_STVE 203) - (UNSPEC_SET_VSCR 213) - (UNSPEC_GET_VRSAVE 214) - (UNSPEC_LVX 215) - (UNSPEC_REDUC_PLUS 217) - (UNSPEC_VECSH 219) - (UNSPEC_EXTEVEN_V4SI 220) - (UNSPEC_EXTEVEN_V8HI 221) - (UNSPEC_EXTEVEN_V16QI 222) - (UNSPEC_EXTEVEN_V4SF 223) - (UNSPEC_EXTODD_V4SI 224) - (UNSPEC_EXTODD_V8HI 225) - (UNSPEC_EXTODD_V16QI 226) - (UNSPEC_EXTODD_V4SF 227) - (UNSPEC_INTERHI_V4SI 228) - (UNSPEC_INTERHI_V8HI 229) - (UNSPEC_INTERHI_V16QI 230) - ;; delete 231 - (UNSPEC_INTERLO_V4SI 232) - (UNSPEC_INTERLO_V8HI 233) - (UNSPEC_INTERLO_V16QI 234) - ;; delete 235 - (UNSPEC_LVLX 236) - (UNSPEC_LVLXL 237) - (UNSPEC_LVRX 238) - (UNSPEC_LVRXL 239) - (UNSPEC_STVLX 240) - (UNSPEC_STVLXL 241) - (UNSPEC_STVRX 242) - (UNSPEC_STVRXL 243) - (UNSPEC_VMULWHUB 308) - (UNSPEC_VMULWLUB 309) - (UNSPEC_VMULWHSB 310) - (UNSPEC_VMULWLSB 311) - (UNSPEC_VMULWHUH 312) - (UNSPEC_VMULWLUH 313) - (UNSPEC_VMULWHSH 314) - (UNSPEC_VMULWLSH 315) - (UNSPEC_VUPKHUB 316) - (UNSPEC_VUPKHUH 317) - (UNSPEC_VUPKLUB 318) - (UNSPEC_VUPKLUH 319) - (UNSPEC_VPERMSI 320) - (UNSPEC_VPERMHI 321) - (UNSPEC_INTERHI 322) - (UNSPEC_INTERLO 323) - (UNSPEC_VUPKHS_V4SF 324) - (UNSPEC_VUPKLS_V4SF 325) - (UNSPEC_VUPKHU_V4SF 326) - (UNSPEC_VUPKLU_V4SF 327) +(define_c_enum "unspec" + [UNSPEC_VCMPBFP + UNSPEC_VMSUMU + UNSPEC_VMSUMM + UNSPEC_VMSUMSHM + UNSPEC_VMSUMUHS + UNSPEC_VMSUMSHS + UNSPEC_VMHADDSHS + UNSPEC_VMHRADDSHS + UNSPEC_VADDCUW + UNSPEC_VADDU + UNSPEC_VADDS + UNSPEC_VAVGU + UNSPEC_VAVGS + UNSPEC_VMULEUB + UNSPEC_VMULESB + UNSPEC_VMULEUH + UNSPEC_VMULESH + UNSPEC_VMULEUW + UNSPEC_VMULESW + UNSPEC_VMULOUB + UNSPEC_VMULOSB + UNSPEC_VMULOUH + UNSPEC_VMULOSH + UNSPEC_VMULOUW + UNSPEC_VMULOSW + UNSPEC_VPKPX + UNSPEC_VPACK_SIGN_SIGN_SAT + UNSPEC_VPACK_SIGN_UNS_SAT + UNSPEC_VPACK_UNS_UNS_SAT + UNSPEC_VPACK_UNS_UNS_MOD + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT + UNSPEC_VREVEV + UNSPEC_VSLV4SI + UNSPEC_VSLO + UNSPEC_VSR + UNSPEC_VSRO + UNSPEC_VSUBCUW + UNSPEC_VSUBU + UNSPEC_VSUBS + UNSPEC_VSUM4UBS + UNSPEC_VSUM4S + UNSPEC_VSUM2SWS + UNSPEC_VSUMSWS + UNSPEC_VPERM + UNSPEC_VPERMR + UNSPEC_VPERM_UNS + UNSPEC_VRFIN + UNSPEC_VCFUX + UNSPEC_VCFSX + UNSPEC_VCTUXS + UNSPEC_VCTSXS + UNSPEC_VLOGEFP + UNSPEC_VEXPTEFP + UNSPEC_VSLDOI + UNSPEC_VUNPACK_HI_SIGN + UNSPEC_VUNPACK_LO_SIGN + UNSPEC_VUNPACK_HI_SIGN_DIRECT + UNSPEC_VUNPACK_LO_SIGN_DIRECT + UNSPEC_VUPKHPX + UNSPEC_VUPKLPX + UNSPEC_CONVERT_4F32_8I16 + UNSPEC_DARN + UNSPEC_DARN_32 + UNSPEC_DARN_RAW + UNSPEC_DST + UNSPEC_DSTT + UNSPEC_DSTST + UNSPEC_DSTSTT + UNSPEC_LVSL + UNSPEC_LVSR + UNSPEC_LVE + UNSPEC_STVX + UNSPEC_STVXL + UNSPEC_STVE + UNSPEC_SET_VSCR + UNSPEC_GET_VRSAVE + UNSPEC_LVX + UNSPEC_REDUC_PLUS + UNSPEC_VECSH + UNSPEC_EXTEVEN_V4SI + UNSPEC_EXTEVEN_V8HI + UNSPEC_EXTEVEN_V16QI + UNSPEC_EXTEVEN_V4SF + UNSPEC_EXTODD_V4SI + UNSPEC_EXTODD_V8HI + UNSPEC_EXTODD_V16QI + UNSPEC_EXTODD_V4SF + UNSPEC_INTERHI_V4SI + UNSPEC_INTERHI_V8HI + UNSPEC_INTERHI_V16QI + UNSPEC_INTERLO_V4SI + UNSPEC_INTERLO_V8HI + UNSPEC_INTERLO_V16QI + UNSPEC_LVLX + UNSPEC_LVLXL + UNSPEC_LVRX + UNSPEC_LVRXL + UNSPEC_STVLX + UNSPEC_STVLXL + UNSPEC_STVRX + UNSPEC_STVRXL + UNSPEC_VADU + UNSPEC_VSLV + UNSPEC_VSRV + UNSPEC_VMULWHUB + UNSPEC_VMULWLUB + UNSPEC_VMULWHSB + UNSPEC_VMULWLSB + UNSPEC_VMULWHUH + UNSPEC_VMULWLUH + UNSPEC_VMULWHSH + UNSPEC_VMULWLSH + UNSPEC_VUPKHUB + UNSPEC_VUPKHUH + UNSPEC_VUPKLUB + UNSPEC_VUPKLUH + UNSPEC_VPERMSI + UNSPEC_VPERMHI + UNSPEC_INTERHI + UNSPEC_INTERLO + UNSPEC_VUPKHS_V4SF + UNSPEC_VUPKLS_V4SF + UNSPEC_VUPKHU_V4SF + UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD + UNSPEC_VMRGH_DIRECT + UNSPEC_VMRGL_DIRECT + UNSPEC_VSPLT_DIRECT + UNSPEC_VMRGEW_DIRECT + UNSPEC_VMRGOW_DIRECT + UNSPEC_VSUMSWS_DIRECT + UNSPEC_VADDCUQ + UNSPEC_VADDEUQM + UNSPEC_VADDECUQ + UNSPEC_VSUBCUQ + UNSPEC_VSUBEUQM + UNSPEC_VSUBECUQ + UNSPEC_VBPERMQ + UNSPEC_VBPERMD + UNSPEC_BCDADD + UNSPEC_BCDSUB + UNSPEC_BCD_OVERFLOW + UNSPEC_CMPRB + UNSPEC_CMPRB2 + UNSPEC_CMPEQB + UNSPEC_VRLMI + UNSPEC_VRLNM ]) -(define_constants - [(UNSPECV_SET_VRSAVE 30) - (UNSPECV_MTVSCR 186) - (UNSPECV_MFVSCR 187) - (UNSPECV_DSSALL 188) - (UNSPECV_DSS 189) +(define_c_enum "unspecv" + [UNSPECV_SET_VRSAVE + UNSPECV_MTVSCR + UNSPECV_MFVSCR + UNSPECV_DSSALL + UNSPECV_DSS ]) -;; Vec int modes -(define_mode_iterator VI [V4SI V8HI V16QI]) -;; Short vec in modes +;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops +(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) +;; Short vec int modes (define_mode_iterator VIshort [V8HI V16QI]) +;; Longer vec int modes for rotate/mask ops +(define_mode_iterator VIlong [V2DI V4SI]) ;; Vec float modes (define_mode_iterator VF [V4SF]) ;; Vec modes, pity mode iterators are not composable (define_mode_iterator V [V4SI V8HI V16QI V4SF]) ;; Vec modes for move/logical/permute ops, include vector types for move not ;; otherwise handled by altivec (v2df, v2di, ti) -(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI]) +(define_mode_iterator VM [V4SI + V8HI + V16QI + V4SF + V2DF + V2DI + V1TI + TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) ;; Like VM, except don't do TImode -(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) - -(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_iterator VM2 [V4SI + V8HI + V16QI + V4SF + V2DF + V2DI + V1TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +;; Map the Vector convert single precision to double precision for integer +;; versus floating point +(define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")]) + +;; Specific iterator for parity which does not have a byte/half-word form, but +;; does have a quad word form +(define_mode_iterator VParity [V4SI + V2DI + V1TI + TI]) + +(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") + (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)") + (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")]) + +;; Vector pack/unpack +(define_mode_iterator VP [V2DI V4SI V8HI]) +(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) + +;; Vector negate +(define_mode_iterator VNEG [V4SI V2DI]) ;; Vector move instructions. (define_insn "*altivec_mov<mode>" - [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*o,*r,*r,v,v") - (match_operand:VM2 1 "input_operand" "v,Z,v,r,o,r,j,W"))] + [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,?Y,?*r,?*r,v,v,?*r") + (match_operand:VM2 1 "input_operand" "v,Z,v,*r,Y,*r,j,W,W"))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -189,17 +256,19 @@ case 5: return "#"; case 6: return "vxor %0,%0,%0"; case 7: return output_vec_const_move (operands); + case 8: return "#"; default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*,*") + (set_attr "length" "4,4,4,20,20,20,4,8,32")]) ;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode ;; is for unions. However for plain data movement, slightly favor the vector ;; loads (define_insn "*altivec_movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?o,?r,?r,v,v") - (match_operand:TI 1 "input_operand" "v,Z,v,r,o,r,j,W"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?Y,?r,?r,v,v") + (match_operand:TI 1 "input_operand" "v,Z,v,r,Y,r,j,W"))] "VECTOR_MEM_ALTIVEC_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" @@ -217,7 +286,7 @@ default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*")]) ;; Load up a vector with the most significant bit set by loading up -1 and ;; doing a shift left @@ -228,7 +297,7 @@ [(const_int 0)] { rtx dest = operands[0]; - enum machine_mode mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[0]); rtvec v; int i, num_elements; @@ -243,8 +312,8 @@ for (i = 0; i < num_elements; i++) RTVEC_ELT (v, i) = constm1_rtx; - emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v))); - emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_ASHIFT (mode, dest, dest))); + emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v))); + emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest))); DONE; }) @@ -257,7 +326,7 @@ { rtx dup = gen_easy_altivec_constant (operands[1]); rtx const_vec; - enum machine_mode op_mode = <MODE>mode; + machine_mode op_mode = <MODE>mode; /* Divide the operand of the resulting VEC_DUPLICATE, and use simplify_rtx to make a CONST_VECTOR. */ @@ -277,9 +346,50 @@ operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]); }) +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_vsldoi" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) + (unspec:VM [(match_dup 2) + (match_dup 4) + (match_dup 6)] + UNSPEC_VSLDOI))] +{ + rtx op1 = operands[1]; + int elt = (BYTES_BIG_ENDIAN) ? 0 : GET_MODE_NUNITS (<MODE>mode) - 1; + HOST_WIDE_INT val = const_vector_elt_as_int (op1, elt); + rtx rtx_val = GEN_INT (val); + int shift = vspltis_shifted (op1); + int nunits = GET_MODE_NUNITS (<MODE>mode); + int i; + + gcc_assert (shift != 0); + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, rtvec_alloc (nunits)); + operands[4] = gen_reg_rtx (<MODE>mode); + + if (shift < 0) + { + operands[5] = CONSTM1_RTX (<MODE>mode); + operands[6] = GEN_INT (-shift); + } + else + { + operands[5] = CONST0_RTX (<MODE>mode); + operands[6] = GEN_INT (shift); + } + + /* Populate the constant vectors. */ + for (i = 0; i < nunits; i++) + XVECEXP (operands[3], 0, i) = rtx_val; +}) + (define_insn "get_vrsave_internal" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))] + (unspec:SI [(reg:SI VRSAVE_REGNO)] UNSPEC_GET_VRSAVE))] "TARGET_ALTIVEC" { if (TARGET_MACHO) @@ -291,9 +401,9 @@ (define_insn "*set_vrsave_internal" [(match_parallel 0 "vrsave_operation" - [(set (reg:SI 109) + [(set (reg:SI VRSAVE_REGNO) (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") - (reg:SI 109)] UNSPECV_SET_VRSAVE))])] + (reg:SI VRSAVE_REGNO)] UNSPECV_SET_VRSAVE))])] "TARGET_ALTIVEC" { if (TARGET_MACHO) @@ -305,7 +415,7 @@ (define_insn "*save_world" [(match_parallel 0 "save_world_operation" - [(clobber (reg:SI 65)) + [(clobber (reg:SI LR_REGNO)) (use (match_operand:SI 1 "call_operand" "s"))])] "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT" "bl %z1" @@ -315,27 +425,89 @@ (define_insn "*restore_world" [(match_parallel 0 "restore_world_operation" [(return) - (use (reg:SI 65)) + (use (reg:SI LR_REGNO)) (use (match_operand:SI 1 "call_operand" "s")) (clobber (match_operand:SI 2 "gpc_reg_operand" "=r"))])] "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT" "b %z1") +;; The save_vregs and restore_vregs patterns don't use memory_operand +;; because (plus (reg) (const_int)) is not a valid vector address. +;; This way is more compact than describing exactly what happens in +;; the out-of-line functions, ie. loading the constant into r11/r12 +;; then using indexed addressing, and requires less editing of rtl +;; to describe the operation to dwarf2out_frame_debug_expr. +(define_insn "*save_vregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 11)) + (use (reg:P 0)) + (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b") + (match_operand:P 3 "short_cint_operand" "I"))) + (match_operand:V4SI 4 "altivec_register_operand" "v"))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_vregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 12)) + (use (reg:P 0)) + (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b") + (match_operand:P 3 "short_cint_operand" "I"))) + (match_operand:V4SI 4 "altivec_register_operand" "v"))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_vregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 11)) + (use (reg:P 0)) + (set (match_operand:V4SI 2 "altivec_register_operand" "=v") + (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b") + (match_operand:P 4 "short_cint_operand" "I"))))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_vregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 12)) + (use (reg:P 0)) + (set (match_operand:V4SI 2 "altivec_register_operand" "=v") + (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b") + (match_operand:P 4 "short_cint_operand" "I"))))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + ;; Simple binary operations. ;; add (define_insn "add<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (plus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (plus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vaddu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_addv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (plus:V4SF (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")))] + (match_operand:V4SF 2 "register_operand" "v")))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vaddfp %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -345,17 +517,17 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VADDCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vaddcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "altivec_vaddu<VI_char>s" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] + (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDU)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "<VI_unit>" "vaddu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -364,17 +536,17 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vadds<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) ;; sub (define_insn "sub<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (minus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (minus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsubu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -391,7 +563,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUBCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vsubcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -400,8 +572,8 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBU)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -410,8 +582,8 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubs<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -430,7 +602,7 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VAVGS))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vavgs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -439,33 +611,33 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")] UNSPEC_VCMPBFP))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_eq<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (eq:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpequ<VI_char> %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_gt<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gt:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgts<VI_char> %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_gtu<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgtu<VI_char> %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_eqv4sf" [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") @@ -495,23 +667,23 @@ [(set (match_operand:VM 0 "altivec_register_operand" "=v") (if_then_else:VM (ne:CC (match_operand:VM 1 "altivec_register_operand" "v") - (const_int 0)) + (match_operand:VM 4 "zero_constant" "")) (match_operand:VM 2 "altivec_register_operand" "v") (match_operand:VM 3 "altivec_register_operand" "v")))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vsel %0,%3,%2,%1" - [(set_attr "type" "vecperm")]) + [(set_attr "type" "vecmove")]) (define_insn "*altivec_vsel<mode>_uns" [(set (match_operand:VM 0 "altivec_register_operand" "=v") (if_then_else:VM (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v") - (const_int 0)) + (match_operand:VM 4 "zero_constant" "")) (match_operand:VM 2 "altivec_register_operand" "v") (match_operand:VM 3 "altivec_register_operand" "v")))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vsel %0,%3,%2,%1" - [(set_attr "type" "vecperm")]) + [(set_attr "type" "vecmove")]) ;; Fused multiply add. @@ -554,75 +726,79 @@ ;; [(set (match_operand:V4SI 0 "register_operand" "=v") ;; (mult:V4SI (match_operand:V4SI 1 "register_operand" "v") ;; (match_operand:V4SI 2 "register_operand" "v")))] +(define_insn "mulv4si3_p8" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (mult:V4SI (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vmuluwm %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_expand "mulv4si3" [(use (match_operand:V4SI 0 "register_operand" "")) (use (match_operand:V4SI 1 "register_operand" "")) (use (match_operand:V4SI 2 "register_operand" ""))] "TARGET_ALTIVEC" - " - { - rtx zero; - rtx swap; - rtx small_swap; - rtx sixteen; - rtx one; - rtx two; - rtx low_product; - rtx high_product; +{ + rtx zero; + rtx swap; + rtx small_swap; + rtx sixteen; + rtx one; + rtx two; + rtx low_product; + rtx high_product; - zero = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + if (TARGET_P8_VECTOR) + { + emit_insn (gen_mulv4si3_p8 (operands[0], operands[1], operands[2])); + DONE; + } + + zero = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); - sixteen = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vspltisw (sixteen, gen_rtx_CONST_INT (V4SImode, -16))); - - swap = gen_reg_rtx (V4SImode); - emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen)); + sixteen = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (sixteen, gen_rtx_CONST_INT (V4SImode, -16))); - one = gen_reg_rtx (V8HImode); - convert_move (one, operands[1], 0); + swap = gen_reg_rtx (V4SImode); + emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen)); + + one = gen_reg_rtx (V8HImode); + convert_move (one, operands[1], 0); - two = gen_reg_rtx (V8HImode); - convert_move (two, operands[2], 0); + two = gen_reg_rtx (V8HImode); + convert_move (two, operands[2], 0); - small_swap = gen_reg_rtx (V8HImode); - convert_move (small_swap, swap, 0); + small_swap = gen_reg_rtx (V8HImode); + convert_move (small_swap, swap, 0); - low_product = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmulouh (low_product, one, two)); + low_product = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vmulouh (low_product, one, two)); - high_product = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero)); + high_product = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero)); - emit_insn (gen_vashlv4si3 (high_product, high_product, sixteen)); + emit_insn (gen_vashlv4si3 (high_product, high_product, sixteen)); - emit_insn (gen_addv4si3 (operands[0], high_product, low_product)); + emit_insn (gen_addv4si3 (operands[0], high_product, low_product)); - DONE; - }") + DONE; +}) (define_expand "mulv8hi3" [(use (match_operand:V8HI 0 "register_operand" "")) (use (match_operand:V8HI 1 "register_operand" "")) (use (match_operand:V8HI 2 "register_operand" ""))] "TARGET_ALTIVEC" - " { - rtx odd = gen_reg_rtx (V4SImode); - rtx even = gen_reg_rtx (V4SImode); - rtx high = gen_reg_rtx (V4SImode); - rtx low = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2])); - emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2])); - - emit_insn (gen_altivec_vmrghw (high, even, odd)); - emit_insn (gen_altivec_vmrglw (low, even, odd)); - - emit_insn (gen_altivec_vpkuwum (operands[0], high, low)); - - DONE; -}") + rtx zero = gen_reg_rtx (V8HImode); + + emit_insn (gen_altivec_vspltish (zero, const0_rtx)); + emit_insn (gen_altivec_vmladduhm(operands[0], operands[1], operands[2], zero)); + + DONE; +}) ;; Fused multiply subtract (define_insn "*altivec_vnmsubfp" @@ -672,7 +848,7 @@ (match_operand:V8HI 2 "register_operand" "v") (match_operand:V4SI 3 "register_operand" "v")] UNSPEC_VMSUMUHS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vmsumuhs %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) @@ -683,7 +859,7 @@ (match_operand:V8HI 2 "register_operand" "v") (match_operand:V4SI 3 "register_operand" "v")] UNSPEC_VMSUMSHS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vmsumshs %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) @@ -691,18 +867,18 @@ ;; max (define_insn "umax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -715,18 +891,18 @@ [(set_attr "type" "veccmp")]) (define_insn "umin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vminu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmins<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -744,7 +920,7 @@ (match_operand:V8HI 2 "register_operand" "v") (match_operand:V8HI 3 "register_operand" "v")] UNSPEC_VMHADDSHS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vmhaddshs %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) @@ -755,223 +931,607 @@ (match_operand:V8HI 2 "register_operand" "v") (match_operand:V8HI 3 "register_operand" "v")] UNSPEC_VMHRADDSHS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vmhraddshs %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) (define_insn "altivec_vmladduhm" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v") - (match_operand:V8HI 3 "register_operand" "v")] - UNSPEC_VMLADDUHM))] + (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (match_operand:V8HI 3 "register_operand" "v")))] "TARGET_ALTIVEC" "vmladduhm %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vmrghb" +(define_expand "altivec_vmrghb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") - (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 8) - (const_int 1) - (const_int 9) - (const_int 2) - (const_int 10) - (const_int 3) - (const_int 11) - (const_int 4) - (const_int 12) - (const_int 5) - (const_int 13) - (const_int 6) - (const_int 14) - (const_int 7) - (const_int 15)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v") - (parallel [(const_int 8) - (const_int 0) - (const_int 9) - (const_int 1) - (const_int 10) - (const_int 2) - (const_int 11) - (const_int 3) - (const_int 12) - (const_int 4) - (const_int 13) - (const_int 5) - (const_int 14) - (const_int 6) - (const_int 15) - (const_int 7)])) - (const_int 21845)))] + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghb %0,%1,%2"; + else + return "vmrglb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] "TARGET_ALTIVEC" "vmrghb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghh" +(define_expand "altivec_vmrghh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghh_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") - (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghh %0,%1,%2"; + else + return "vmrglh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] "TARGET_ALTIVEC" "vmrghh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghw" +(define_expand "altivec_vmrghw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vmrghsf" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrghw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglb" +(define_expand "altivec_vmrglb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") - (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v") - (parallel [(const_int 8) - (const_int 0) - (const_int 9) - (const_int 1) - (const_int 10) - (const_int 2) - (const_int 11) - (const_int 3) - (const_int 12) - (const_int 4) - (const_int 13) - (const_int 5) - (const_int 14) - (const_int 6) - (const_int 15) - (const_int 7)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 8) - (const_int 1) - (const_int 9) - (const_int 2) - (const_int 10) - (const_int 3) - (const_int 11) - (const_int 4) - (const_int 12) - (const_int 5) - (const_int 13) - (const_int 6) - (const_int 14) - (const_int 7) - (const_int 15)])) - (const_int 21845)))] + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglb %0,%1,%2"; + else + return "vmrghb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] "TARGET_ALTIVEC" "vmrglb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglh" +(define_expand "altivec_vmrglh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglh_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") - (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglh %0,%1,%2"; + else + return "vmrghh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] "TARGET_ALTIVEC" "vmrglh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglw" +(define_expand "altivec_vmrglw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vmrglsf" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrglw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +;; Power8 vector merge two V4SF/V4SI even words to V4SF +(define_insn "p8_vmrgew_<mode>" + [(set (match_operand:VSX_W 0 "register_operand" "=v") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 1 "register_operand" "v") + (match_operand:VSX_W 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgew %0,%1,%2"; + else + return "vmrgow %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgow %0,%1,%2"; + else + return "vmrgew %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgew_<mode>_direct" + [(set (match_operand:VSX_W 0 "register_operand" "=v") + (unspec:VSX_W [(match_operand:VSX_W 1 "register_operand" "v") + (match_operand:VSX_W 2 "register_operand" "v")] + UNSPEC_VMRGEW_DIRECT))] + "TARGET_P8_VECTOR" + "vmrgew %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow_<mode>_direct" + [(set (match_operand:VSX_W 0 "register_operand" "=v") + (unspec:VSX_W [(match_operand:VSX_W 1 "register_operand" "v") + (match_operand:VSX_W 2 "register_operand" "v")] + UNSPEC_VMRGOW_DIRECT))] + "TARGET_P8_VECTOR" + "vmrgow %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_expand "vec_widen_umult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_even_v4si" + [(use (match_operand:V2DI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_P8_VECTOR" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v4si" + [(use (match_operand:V2DI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_P8_VECTOR" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v4si" + [(use (match_operand:V2DI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_P8_VECTOR" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulouw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleuw (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v4si" + [(use (match_operand:V2DI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_P8_VECTOR" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosw (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesw (operands[0], operands[1], operands[2])); + DONE; +}) + (define_insn "altivec_vmuleub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -981,6 +1541,15 @@ "vmuleub %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuloub" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULOUB))] + "TARGET_ALTIVEC" + "vmuloub %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulesb" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -990,6 +1559,15 @@ "vmulesb %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulosb" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULOSB))] + "TARGET_ALTIVEC" + "vmulosb %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmuleuh" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") @@ -999,6 +1577,15 @@ "vmuleuh %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulouh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULOUH))] + "TARGET_ALTIVEC" + "vmulouh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulesh" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") @@ -1008,33 +1595,6 @@ "vmulesh %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vmuloub" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VMULOUB))] - "TARGET_ALTIVEC" - "vmuloub %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_insn "altivec_vmulosb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VMULOSB))] - "TARGET_ALTIVEC" - "vmulosb %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_insn "altivec_vmulouh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VMULOUH))] - "TARGET_ALTIVEC" - "vmulouh %0,%1,%2" - [(set_attr "type" "veccomplex")]) - (define_insn "altivec_vmulosh" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") @@ -1044,152 +1604,160 @@ "vmulosh %0,%1,%2" [(set_attr "type" "veccomplex")]) - -;; logical ops. Have the logical ops follow the memory ops in -;; terms of whether to prefer VSX or Altivec - -(define_insn "*altivec_and<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vand %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_ior<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_xor<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (xor:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vxor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_one_cmpl<mode>2" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vnor %0,%1,%1" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_nor<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v"))))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vnor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_andc<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (not:VM (match_operand:VM 2 "register_operand" "v")) - (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vandc %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vpkuhum" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkuwum" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) - +(define_insn "altivec_vmuleuw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULEUW))] + "TARGET_P8_VECTOR" + "vmuleuw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulouw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOUW))] + "TARGET_P8_VECTOR" + "vmulouw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulesw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULESW))] + "TARGET_P8_VECTOR" + "vmulesw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulosw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOSW))] + "TARGET_P8_VECTOR" + "vmulosw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +;; Vector pack/unpack (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VPKPX))] "TARGET_ALTIVEC" - "vpkpx %0,%1,%2" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpkpx %0,%1,%2\"; + else + return \"vpkpx %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshss" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshss %0,%1,%2" +(define_insn "altivec_vpks<VI_char>ss" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_SIGN_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>ss %0,%1,%2\"; + else + return \"vpks<VI_char>ss %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkswss" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswss %0,%1,%2" +(define_insn "altivec_vpks<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>us %0,%1,%2\"; + else + return \"vpks<VI_char>us %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuhus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuhus %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkshus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>us %0,%1,%2\"; + else + return \"vpku<VI_char>us %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuwus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuwus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>um" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkswus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>um_direct" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))] + "<VI_unit>" + "* + { + if (BYTES_BIG_ENDIAN) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vrl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (rotate:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vrl<VI_char>mi" + [(set (match_operand:VIlong 0 "register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0") + (match_operand:VIlong 2 "register_operand" "v") + (match_operand:VIlong 3 "register_operand" "v")] + UNSPEC_VRLMI))] + "TARGET_P9_VECTOR" + "vrl<VI_char>mi %0,%2,%3" + [(set_attr "type" "veclogical")]) + +(define_insn "altivec_vrl<VI_char>nm" + [(set (match_operand:VIlong 0 "register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") + (match_operand:VIlong 2 "register_operand" "v")] + UNSPEC_VRLNM))] + "TARGET_P9_VECTOR" + "vrl<VI_char>nm %0,%1,%2" + [(set_attr "type" "veclogical")]) + (define_insn "altivec_vsl" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -1208,27 +1776,45 @@ "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_insn "vslv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSLV))] + "TARGET_P9_VECTOR" + "vslv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vsrv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSRV))] + "TARGET_P9_VECTOR" + "vsrv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashift:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsr<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsr<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsra<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsra<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1255,7 +1841,7 @@ (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUM4UBS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vsum4ubs %0,%1,%2" [(set_attr "type" "veccomplex")]) @@ -1265,69 +1851,257 @@ (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUM4S)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vsum4s<VI_char>s %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vsum2sws" +(define_expand "altivec_vsum2sws" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vsum2sws_direct (operands[0], operands[1], + operands[2])); + else + { + rtx tmp1 = gen_reg_rtx (V4SImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vsldoi_v4si (tmp1, operands[2], + operands[2], GEN_INT (12))); + emit_insn (gen_altivec_vsum2sws_direct (tmp2, operands[1], tmp1)); + emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2, + GEN_INT (4))); + } + DONE; +}) + +; FIXME: This can probably be expressed without an UNSPEC. +(define_insn "altivec_vsum2sws_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUM2SWS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vsum2sws %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_expand "altivec_vsumsws" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V4SI 1 "register_operand")) + (use (match_operand:V4SI 2 "register_operand"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vsumsws_direct (operands[0], operands[1], + operands[2])); + else + { + rtx tmp1 = gen_reg_rtx (V4SImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (tmp1, operands[2], const0_rtx)); + emit_insn (gen_altivec_vsumsws_direct (tmp2, operands[1], tmp1)); + emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2, + GEN_INT (12))); + } + DONE; +}) + +; FIXME: This can probably be expressed without an UNSPEC. +(define_insn "altivec_vsumsws_direct" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VSUM2SWS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vsum2sws %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_insn "altivec_vsumsws" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VSUMSWS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + UNSPEC_VSUMSWS_DIRECT)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vsumsws %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vspltb" +(define_expand "altivec_vspltb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V16QImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_duplicate:V16QI (vec_select:QI (match_operand:V16QI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + return "vspltb %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vspltb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vsplth" +(define_expand "altivec_vsplth" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V8HImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vsplth_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_duplicate:V8HI (vec_select:HI (match_operand:V8HI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + return "vsplth %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vsplth_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vsplth %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vspltw" +(define_expand "altivec_vspltw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_duplicate:V4SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vspltw %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vspltsf" +(define_expand "altivec_vspltsf" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SFmode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltsf_internal" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_duplicate:V4SF (vec_select:SF (match_operand:V4SF 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" - "vspltw %0,%1,%2" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} [(set_attr "type" "vecperm")]) (define_insn "altivec_vspltis<VI_char>" @@ -1340,30 +2114,120 @@ (define_insn "*altivec_vrfiz" [(set (match_operand:V4SF 0 "register_operand" "=v") - (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] + (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vperm_<mode>" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] +(define_expand "altivec_vperm_<mode>" + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +;; Slightly prefer vperm, since the target does not overlap the source +(define_insn "*altivec_vperm_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "altivec_vperm_v8hiv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=v,?wo") + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vperm_<mode>_uns" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_expand "altivec_vperm_<mode>_uns" + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM_UNS))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_insn "*altivec_vperm_<mode>_uns_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_expand "vec_permv16qi" + [(set (match_operand:V16QI 0 "register_operand" "") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN) { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_expand "vec_perm_constv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "" "")] + "TARGET_ALTIVEC" +{ + if (altivec_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_insn "*altivec_vpermr_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMR))] + "TARGET_P9_VECTOR" + "@ + vpermr %0,%2,%1,%3 + xxpermr %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) (define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") @@ -1412,7 +2276,7 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_VCTUXS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vctuxs %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -1422,7 +2286,7 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_VCTSXS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vctsxs %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -1475,7 +2339,7 @@ RTVEC_ELT (v, 2) = GEN_INT (mask_val); RTVEC_ELT (v, 3) = GEN_INT (mask_val); - emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v))); + emit_insn (gen_vec_initv4sisi (mask, gen_rtx_PARALLEL (V4SImode, v))); emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2], gen_lowpart (V4SFmode, mask))); DONE; @@ -1486,99 +2350,119 @@ (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:QI 3 "immediate_operand" "i")] - UNSPEC_VLSDOI))] + UNSPEC_VSLDOI))] "TARGET_ALTIVEC" "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - "vupkhsb %0,%1" +(define_insn "altivec_vupkhs<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhs<VU_char> %0,%1"; + else + return "vupkls<VU_char> %0,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vupkhs<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "vupkhs<VU_char> %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkls<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkls<VU_char> %0,%1"; + else + return "vupkhs<VU_char> %0,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vupkls<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "vupkls<VU_char> %0,%1" [(set_attr "type" "vecperm")]) (define_insn "altivec_vupkhpx" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKHPX))] "TARGET_ALTIVEC" - "vupkhpx %0,%1" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vupkhsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - "vupkhsh %0,%1" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vupklsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - "vupklsb %0,%1" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhpx %0,%1"; + else + return "vupklpx %0,%1"; +} [(set_attr "type" "vecperm")]) (define_insn "altivec_vupklpx" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKLPX))] "TARGET_ALTIVEC" - "vupklpx %0,%1" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vupklsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - "vupklsh %0,%1" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupklpx %0,%1"; + else + return "vupkhpx %0,%1"; +} [(set_attr "type" "vecperm")]) ;; Compare vectors producing a vector result and a predicate, setting CR6 to ;; indicate a combined status (define_insn "*altivec_vcmpequ<VI_char>_p" - [(set (reg:CC 74) - (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (eq:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (eq:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpequ<VI_char>. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpgts<VI_char>_p" - [(set (reg:CC 74) - (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gt:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gt:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpgts<VI_char>. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpgtu<VI_char>_p" - [(set (reg:CC 74) - (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gtu:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gtu:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpgtu<VI_char>. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpeqfp_p" - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v"))] UNSPEC_PREDICATE)) @@ -1590,7 +2474,7 @@ [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgtfp_p" - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(gt:CC (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v"))] UNSPEC_PREDICATE)) @@ -1602,7 +2486,7 @@ [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgefp_p" - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(ge:CC (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v"))] UNSPEC_PREDICATE)) @@ -1614,7 +2498,7 @@ [(set_attr "type" "veccmp")]) (define_insn "altivec_vcmpbfp_p" - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")] UNSPEC_VCMPBFP)) @@ -1627,7 +2511,7 @@ [(set_attr "type" "veccmp")]) (define_insn "altivec_mtvscr" - [(set (reg:SI 110) + [(set (reg:SI VSCR_REGNO) (unspec_volatile:SI [(match_operand:V4SI 0 "register_operand" "v")] UNSPECV_MTVSCR))] "TARGET_ALTIVEC" @@ -1636,7 +2520,7 @@ (define_insn "altivec_mfvscr" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec_volatile:V8HI [(reg:SI 110)] UNSPECV_MFVSCR))] + (unspec_volatile:V8HI [(reg:SI VSCR_REGNO)] UNSPECV_MFVSCR))] "TARGET_ALTIVEC" "mfvscr %0" [(set_attr "type" "vecsimple")]) @@ -1686,16 +2570,84 @@ "dststt %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_lvsl" +(define_expand "altivec_lvsl" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsl_reg" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSL_REG))] + "TARGET_ALTIVEC" + "lvsl %0,0,%1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] UNSPEC_LVSL))] + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] + UNSPEC_LVSL))] "TARGET_ALTIVEC" "lvsl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvsr" +(define_expand "altivec_lvsr" + [(use (match_operand:V16QI 0 "altivec_register_operand")) + (use (match_operand:V16QI 1 "memory_operand"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsr_reg" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSR_REG))] + "TARGET_ALTIVEC" + "lvsr %0,0,%1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] UNSPEC_LVSR))] + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] + UNSPEC_LVSR))] "TARGET_ALTIVEC" "lvsr %0,%y1" [(set_attr "type" "vecload")]) @@ -1713,8 +2665,7 @@ addr = XEXP (operands[1], 0); temp = gen_reg_rtx (GET_MODE (addr)); - emit_insn (gen_rtx_SET (VOIDmode, temp, - gen_rtx_NEG (GET_MODE (addr), addr))); + emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (GET_MODE (addr), addr))); emit_insn (gen_altivec_lvsr (operands[0], replace_equiv_address (operands[1], temp))); DONE; @@ -1723,7 +2674,21 @@ ;; Parallel some of the LVE* and STV*'s with unspecs because some have ;; identical rtl but different instructions-- and gcc gets confused. -(define_insn "altivec_lve<VI_char>x" +(define_expand "altivec_lve<VI_char>x" + [(parallel + [(set (match_operand:VI 0 "register_operand" "=v") + (match_operand:VI 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVE)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE); + DONE; + } +}) + +(define_insn "*altivec_lve<VI_char>x_internal" [(parallel [(set (match_operand:VI 0 "register_operand" "=v") (match_operand:VI 1 "memory_operand" "Z")) @@ -1741,16 +2706,33 @@ "lvewx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvxl" +(define_expand "altivec_lvxl_<mode>" [(parallel - [(set (match_operand:V4SI 0 "register_operand" "=v") - (match_operand:V4SI 1 "memory_operand" "Z")) + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR); + DONE; + } +}) + +(define_insn "*altivec_lvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] "TARGET_ALTIVEC" "lvxl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvx_<mode>" +; This version of lvx is used only in cases where we need to force an lvx +; over any other load, and we don't care about losing CSE opportunities. +; Its primary use is for prologue register saves. +(define_insn "altivec_lvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "register_operand" "=v") (match_operand:VM2 1 "memory_operand" "Z")) @@ -1759,7 +2741,46 @@ "lvx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_stvx_<mode>" +; The next two patterns embody what lvx should usually look like. +(define_insn "altivec_lvx_<mode>_2op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvx_<mode>_1op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; 32-bit versions of the above. +(define_insn "altivec_lvx_<mode>_2op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvx_<mode>_1op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; This version of stvx is used only in cases where we need to force an stvx +; over any other store, and we don't care about losing CSE opportunities. +; Its primary use is for epilogue register restores. +(define_insn "altivec_stvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "memory_operand" "=Z") (match_operand:VM2 1 "register_operand" "v")) @@ -1768,16 +2789,78 @@ "stvx %1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "altivec_stvxl" +; The next two patterns embody what stvx should usually look like. +(define_insn "altivec_stvx_<mode>_2op" + [(set (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op" + [(set (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + +; 32-bit versions of the above. +(define_insn "altivec_stvx_<mode>_2op_si" + [(set (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op_si" + [(set (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + +(define_expand "altivec_stvxl_<mode>" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVXL)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL); + DONE; + } +}) + +(define_insn "*altivec_stvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVXL)])] "TARGET_ALTIVEC" "stvxl %1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "altivec_stve<VI_char>x" +(define_expand "altivec_stve<VI_char>x" + [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") + (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE); + DONE; + } +}) + +(define_insn "*altivec_stve<VI_char>x_internal" [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] "TARGET_ALTIVEC" @@ -1791,21 +2874,406 @@ "stvewx %1,%y0" [(set_attr "type" "vecstore")]) +;; Generate doublee +;; signed int/float to double convert words 0 and 2 +(define_expand "doublee<mode>2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:VSX_W 1 "register_operand" "v"))] + "TARGET_VSX" +{ + machine_mode op_mode = GET_MODE (operands[1]); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + Input words 0 and 2 are where they need to be. */ + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], operands[1])); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + take (operand[1] operand[1]) and shift left one word + 3 2 1 0 3 2 1 0 => 2 1 0 3 + Input words 2 and 0 are now where they need to be for the + conversion. */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (1); + + rtx_tmp = gen_reg_rtx (op_mode); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate unsdoublee +;; unsigned int to double convert words 0 and 2 +(define_expand "unsdoubleev4si2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:V4SI 1 "register_operand" "v"))] + "TARGET_VSX" +{ + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + Input words 0 and 2 are where they need to be. */ + emit_insn (gen_vsx_xvcvuxwdp (operands[0], operands[1])); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + take (operand[1] operand[1]) and shift left one word + 3 2 1 0 3 2 1 0 => 2 1 0 3 + Input words 2 and 0 are now where they need to be for the + conversion. */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (1); + + rtx_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate doubleov +;; signed int/float to double convert words 1 and 3 +(define_expand "doubleo<mode>2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:VSX_W 1 "register_operand" "v"))] + "TARGET_VSX" +{ + machine_mode op_mode = GET_MODE (operands[1]); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + take (operand[1] operand[1]) and shift left one word + 0 1 2 3 0 1 2 3 => 1 2 3 0 + Input words 1 and 3 are now where they need to be for the + conversion. */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (1); + + rtx_tmp = gen_reg_rtx (op_mode); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + Input words 3 and 1 are where they need to be. */ + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], operands[1])); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate unsdoubleov +;; unsigned int to double convert words 1 and 3 +(define_expand "unsdoubleov4si2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:V4SI 1 "register_operand" "v"))] + "TARGET_VSX" +{ + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + take (operand[1] operand[1]) and shift left one word + 0 1 2 3 0 1 2 3 => 1 2 3 0 + Input words 1 and 3 are now where they need to be for the + conversion. */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (1); + + rtx_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + else + { + /* Want to convert the words 1 and 3. + Little endian word numbering for operand is 3 2 1 0. + Input words 3 and 1 are where they need to be. */ + emit_insn (gen_vsx_xvcvuxwdp (operands[0], operands[1])); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate doublehv +;; signed int/float to double convert words 0 and 1 +(define_expand "doubleh<mode>2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:VSX_W 1 "register_operand" "v"))] + "TARGET_VSX" +{ + rtx rtx_tmp; + rtx rtx_val; + + machine_mode op_mode = GET_MODE (operands[1]); + rtx_tmp = gen_reg_rtx (op_mode); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + Shift operand left one word, rtx_tmp word order is now 1 2 3 0. + take (rts_tmp operand[1]) and shift left three words + 1 2 3 0 0 1 2 3 => 0 0 1 2 + Input words 0 and 1 are now where they need to be for the + conversion. */ + rtx_val = GEN_INT (1); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, rtx_tmp, + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + Shift operand left three words, rtx_tmp word order is now 0 3 2 1. + take (operand[1] rts_tmp) and shift left two words + 3 2 1 0 0 3 2 1 => 1 0 0 3 + Input words 0 and 1 are now where they need to be for the + conversion. */ + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (2); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + rtx_tmp, rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate unsdoublehv +;; unsigned int to double convert words 0 and 1 +(define_expand "unsdoublehv4si2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:V4SI 1 "register_operand" "v"))] + "TARGET_VSX" +{ + rtx rtx_tmp = gen_reg_rtx (V4SImode); + rtx rtx_val = GEN_INT (12); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for words in operand is 0 1 2 3. + Shift operand left one word, rtx_tmp word order is now 1 2 3 0. + take (rts_tmp operand[1]) and shift left three words + 1 2 3 0 0 1 2 3 => 0 0 1 2 + Input words 0 and 1 are now where they need to be for the + conversion. */ + rtx_val = GEN_INT (1); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, rtx_tmp, + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + Shift operand left three words, rtx_tmp word order is now 0 3 2 1. + take (operand[1] rts_tmp) and shift left two words + 3 2 1 0 0 3 2 1 => 1 0 0 3 + Input words 1 and 0 are now where they need to be for the + conversion. */ + rtx_val = GEN_INT (3); + + rtx_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (2); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + rtx_tmp, rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate doublelv +;; signed int/float to double convert words 2 and 3 +(define_expand "doublel<mode>2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:VSX_W 1 "register_operand" "v"))] + "TARGET_VSX" +{ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (3); + + machine_mode op_mode = GET_MODE (operands[1]); + rtx_tmp = gen_reg_rtx (op_mode); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for operand is 0 1 2 3. + Shift operand left three words, rtx_tmp word order is now 3 0 1 2. + take (operand[1] rtx_tmp) and shift left two words + 0 1 2 3 3 0 1 2 => 2 3 3 0 + now use convert instruction to convert word 2 and 3 in the + input vector. */ + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (2); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + rtx_tmp, rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + Shift operand left one word, rtx_tmp word order is now 2 1 0 3. + take (rtx_tmp operand[1]) and shift left three words + 2 1 0 3 3 2 1 0 => 3 3 2 1 + now use convert instruction to convert word 3 and 2 in the + input vector. */ + rtx_val = GEN_INT (1); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_<mode> (rtx_tmp, rtx_tmp, + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcv<VS_sxwsp>dp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate unsdoublelv +;; unsigned int to double convert convert 2 and 3 +(define_expand "unsdoublelv4si2" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (match_operand:V4SI 1 "register_operand" "v"))] + "TARGET_VSX" +{ + rtx rtx_tmp = gen_reg_rtx (V4SImode); + rtx rtx_val = GEN_INT (12); + + if (VECTOR_ELT_ORDER_BIG) + { + /* Big endian word numbering for operand is 0 1 2 3. + Shift operand left three words, rtx_tmp word order is now 3 0 1 2. + take (operand[1] rtx_tmp) and shift left two words + 0 1 2 3 3 0 1 2 => 2 3 3 0 + now use convert instruction to convert word 2 and 3 in the + input vector. */ + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + operands[1], rtx_val)); + + rtx_val = GEN_INT (2); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, operands[1], + rtx_tmp, rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + else + { + /* Little endian word numbering for operand is 3 2 1 0. + Shift operand left one word, rtx_tmp word order is now 2 1 0 3. + take (rtx_tmp operand[1]) and shift left three words + 2 1 0 3 3 2 1 0 => 3 3 2 1 + now use convert instruction to convert word 3 and 2 in the + input vector. */ + rtx_val = GEN_INT (1); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, + operands[1], operands[1], rtx_val)); + + rtx_val = GEN_INT (3); + emit_insn (gen_vsx_xxsldwi_v4si (rtx_tmp, rtx_tmp, + operands[1], rtx_val)); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], rtx_tmp)); + } + DONE; +} + [(set_attr "type" "veccomplex")]) + +;; Generate two vector F32 converted to packed vector I16 vector +(define_expand "convert_4f32_8i16" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_CONVERT_4F32_8I16))] + "TARGET_P9_VECTOR" +{ + rtx rtx_tmp_hi = gen_reg_rtx (V4SImode); + rtx rtx_tmp_lo = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vctuxs (rtx_tmp_hi, operands[1], const0_rtx)); + emit_insn (gen_altivec_vctuxs (rtx_tmp_lo, operands[2], const0_rtx)); + emit_insn (gen_altivec_vpkswss (operands[0], rtx_tmp_hi, rtx_tmp_lo)); + DONE; +}) + ;; Generate -;; vspltis? SCRATCH0,0 +;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 ;; vmaxs? %0,%1,SCRATCH2" (define_expand "abs<mode>2" - [(set (match_dup 2) (vec_duplicate:VI (const_int 0))) - (set (match_dup 3) - (minus:VI (match_dup 2) - (match_operand:VI 1 "register_operand" "v"))) - (set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_dup 1) (match_dup 3)))] - "TARGET_ALTIVEC" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" { - operands[2] = gen_reg_rtx (GET_MODE (operands[0])); - operands[3] = gen_reg_rtx (GET_MODE (operands[0])); + int i, n_elt = GET_MODE_NUNITS (<MODE>mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); +}) + +;; Generate +;; vspltisw SCRATCH1,0 +;; vsubu?m SCRATCH2,SCRATCH1,%1 +;; vmins? %0,%1,SCRATCH2" +(define_expand "nabs<mode>2" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" +{ + int i; + int n_elt = GET_MODE_NUNITS (<MODE>mode); + + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); }) ;; Generate @@ -1836,7 +3304,8 @@ (unspec:VI [(match_dup 2) (match_operand:VI 1 "register_operand" "v")] UNSPEC_VSUBS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]) + (set (reg:SI VSCR_REGNO) + (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]) (set (match_operand:VI 0 "register_operand" "=v") (smax:VI (match_dup 1) (match_dup 3)))] "TARGET_ALTIVEC" @@ -1845,62 +3314,47 @@ operands[3] = gen_reg_rtx (GET_MODE (operands[0])); }) -(define_insn "altivec_vsumsws_nomode" - [(set (match_operand 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VSUMSWS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vsumsws %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_expand "reduc_splus_<mode>" - [(set (match_operand:VIshort 0 "register_operand" "=v") +(define_expand "reduc_plus_scal_<mode>" + [(set (match_operand:<VI_scalar> 0 "register_operand" "=v") (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] UNSPEC_REDUC_PLUS))] "TARGET_ALTIVEC" - " -{ +{ rtx vzero = gen_reg_rtx (V4SImode); rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (<MODE>mode); + rtx dest = gen_lowpart (V4SImode, vtmp2); + int elt = VECTOR_ELT_ORDER_BIG ? GET_MODE_NUNITS (<MODE>mode) - 1 : 0; emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); - emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero)); + emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); + rs6000_expand_vector_extract (operands[0], vtmp2, GEN_INT (elt)); DONE; -}") - -(define_expand "reduc_uplus_v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_REDUC_PLUS))] - "TARGET_ALTIVEC" - " -{ - rtx vzero = gen_reg_rtx (V4SImode); - rtx vtmp1 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); - emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero)); - emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero)); - DONE; -}") +}) + +(define_insn "*p9_neg<mode>2" + [(set (match_operand:VNEG 0 "altivec_register_operand" "=v") + (neg:VNEG (match_operand:VNEG 1 "altivec_register_operand" "v")))] + "TARGET_P9_VECTOR" + "vneg<VI_char> %0,%1" + [(set_attr "type" "vecsimple")]) (define_expand "neg<mode>2" - [(use (match_operand:VI 0 "register_operand" "")) - (use (match_operand:VI 1 "register_operand" ""))] - "TARGET_ALTIVEC" - " + [(set (match_operand:VI2 0 "register_operand" "") + (neg:VI2 (match_operand:VI2 1 "register_operand" "")))] + "<VI_unit>" { - rtx vzero; - - vzero = gen_reg_rtx (GET_MODE (operands[0])); - emit_insn (gen_altivec_vspltis<VI_char> (vzero, const0_rtx)); - emit_insn (gen_sub<mode>3 (operands[0], vzero, operands[1])); - - DONE; -}") + if (!TARGET_P9_VECTOR || (<MODE>mode != V4SImode && <MODE>mode != V2DImode)) + { + rtx vzero; + + vzero = gen_reg_rtx (GET_MODE (operands[0])); + emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); + emit_insn (gen_sub<mode>3 (operands[0], vzero, operands[1])); + DONE; + } +}) (define_expand "udot_prod<mode>" [(set (match_operand:V4SI 0 "register_operand" "=v") @@ -1973,69 +3427,45 @@ DONE; }") -(define_expand "vec_unpacks_hi_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsb (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_hi_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsh (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_lo_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsb (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_lo_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsh (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_hi_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "") + +(define_expand "vec_unpacks_lo_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "") (define_insn "vperm_v8hiv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:V4SI 0 "register_operand" "=v,?wo") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V4SI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMSI))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) (define_insn "vperm_v16qiv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:V8HI 0 "register_operand" "=v,?wo") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMHI))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) (define_expand "vec_unpacku_hi_v16qi" @@ -2048,27 +3478,28 @@ rtx vzero = gen_reg_rtx (V8HImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7); - - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); DONE; }") @@ -2083,27 +3514,28 @@ rtx vzero = gen_reg_rtx (V4SImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7); - - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); DONE; }") @@ -2118,27 +3550,28 @@ rtx vzero = gen_reg_rtx (V8HImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15); - - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); DONE; }") @@ -2153,27 +3586,28 @@ rtx vzero = gen_reg_rtx (V4SImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15); - - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); DONE; }") @@ -2189,9 +3623,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2206,9 +3649,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2223,9 +3675,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2240,9 +3701,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2257,9 +3727,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2274,9 +3753,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2291,9 +3779,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2308,33 +3805,53 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); - emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } DONE; }") -(define_expand "vec_pack_trunc_v8hi" +(define_expand "vec_pack_trunc_<mode>" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "") + +(define_expand "mulv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] + (mult:V16QI (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")))] "TARGET_ALTIVEC" " { - emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_pack_trunc_v4si" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2])); + rtx even = gen_reg_rtx (V8HImode); + rtx odd = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + int i; + + for (i = 0; i < 8; ++i) { + RTVEC_ELT (v, 2 * i) + = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 1 : 31 - 2 * i); + RTVEC_ELT (v, 2 * i + 1) + = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i); + } + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2])); + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask)); DONE; }") @@ -2358,11 +3875,36 @@ DONE; }") +;; Vector reverse elements +(define_expand "altivec_vreve<mode>2" + [(set (match_operand:VEC_A 0 "register_operand" "=v") + (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")] + UNSPEC_VREVEV))] + "TARGET_ALTIVEC" +{ + int i, j, size, num_elements; + rtvec v = rtvec_alloc (16); + rtx mask = gen_reg_rtx (V16QImode); + + size = GET_MODE_UNIT_SIZE (<MODE>mode); + num_elements = GET_MODE_NUNITS (<MODE>mode); + + for (j = 0; j < num_elements; j++) + for (i = 0; i < size; i++) + RTVEC_ELT (v, i + j * size) + = GEN_INT (i + (num_elements - 1 - j) * size); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], + operands[1], mask)); + DONE; +}) + ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL, ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell. (define_insn "altivec_lvlx" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] UNSPEC_LVLX))] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "lvlx %0,%y1" @@ -2370,7 +3912,7 @@ (define_insn "altivec_lvlxl" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] UNSPEC_LVLXL))] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "lvlxl %0,%y1" @@ -2378,7 +3920,7 @@ (define_insn "altivec_lvrx" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] UNSPEC_LVRX))] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "lvrx %0,%y1" @@ -2386,7 +3928,7 @@ (define_insn "altivec_lvrxl" [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] UNSPEC_LVRXL))] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "lvrxl %0,%y1" @@ -2394,8 +3936,8 @@ (define_insn "altivec_stvlx" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVLX)])] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "stvlx %1,%y0" @@ -2403,8 +3945,8 @@ (define_insn "altivec_stvlxl" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVLXL)])] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "stvlxl %1,%y0" @@ -2412,8 +3954,8 @@ (define_insn "altivec_stvrx" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVRX)])] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "stvrx %1,%y0" @@ -2421,277 +3963,13 @@ (define_insn "altivec_stvrxl" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVRXL)])] "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" "stvrxl %1,%y0" [(set_attr "type" "vecstore")]) -(define_expand "vec_extract_evenv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - UNSPEC_EXTEVEN_V4SI))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4si (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_expand "vec_extract_evenv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")] - UNSPEC_EXTEVEN_V4SF))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4sf (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_expand "vec_extract_evenv8hi" - [(set (match_operand:V4SI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - UNSPEC_EXTEVEN_V8HI))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 20); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 21); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 28); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 29); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v8hi (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_expand "vec_extract_evenv16qi" - [(set (match_operand:V4SI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")] - UNSPEC_EXTEVEN_V16QI))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 18); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 20); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 22); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 26); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 28); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 30); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v16qi (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_expand "vec_extract_oddv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - UNSPEC_EXTODD_V4SI))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 7); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 15); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 20); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 21); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 22); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 23); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 28); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 29); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 30); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 31); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4si (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_expand "vec_extract_oddv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")] - UNSPEC_EXTODD_V4SF))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 7); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 15); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 20); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 21); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 22); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 23); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 28); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 29); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 30); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 31); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4sf (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_insn "vpkuhum_nomode" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "register_operand" "v") - (match_operand 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "vpkuwum_nomode" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand 1 "register_operand" "v") - (match_operand 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_expand "vec_extract_oddv8hi" - [(set (match_operand:V8HI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - UNSPEC_EXTODD_V8HI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_vpkuwum_nomode (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_extract_oddv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")] - UNSPEC_EXTODD_V16QI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_vpkuhum_nomode (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_high<mode>" - [(set (match_operand:VI 0 "register_operand" "") - (unspec:VI [(match_operand:VI 1 "register_operand" "") - (match_operand:VI 2 "register_operand" "")] - UNSPEC_INTERHI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrgh<VI_char> (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_low<mode>" - [(set (match_operand:VI 0 "register_operand" "") - (unspec:VI [(match_operand:VI 1 "register_operand" "") - (match_operand:VI 2 "register_operand" "")] - UNSPEC_INTERLO))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrgl<VI_char> (operands[0], operands[1], operands[2])); - DONE; -}") - (define_expand "vec_unpacks_float_hi_v8hi" [(set (match_operand:V4SF 0 "register_operand" "") (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] @@ -2747,3 +4025,514 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +;; Power8/power9 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector absolute difference unsigned +(define_expand "vadu<mode>3" + [(set (match_operand:VI 0 "register_operand") + (unspec:VI [(match_operand:VI 1 "register_operand") + (match_operand:VI 2 "register_operand")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR") + +;; Vector absolute difference unsigned +(define_insn "*p9_vadu<mode>3" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR" + "vabsdu<wd> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector count trailing zeros +(define_insn "*p9v_ctz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ctz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vctz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector parity +(define_insn "*p9v_parity<mode>2" + [(set (match_operand:VParity 0 "register_operand" "=v") + (parity:VParity (match_operand:VParity 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vprtyb<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + + +;; 128-bit binary integer arithmetic +;; We have a special container type (V1TImode) to allow operations using the +;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without +;; having to worry about the register allocator deciding GPRs are better. + +(define_insn "altivec_vadduqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (plus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vadduqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VADDCUQ))] + "TARGET_VADDUQM" + "vaddcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (minus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vsubuqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VSUBCUQ))] + "TARGET_VADDUQM" + "vsubcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDEUQM))] + "TARGET_VADDUQM" + "vaddeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDECUQ))] + "TARGET_VADDUQM" + "vaddecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBEUQM))] + "TARGET_VADDUQM" + "vsubeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBECUQ))] + "TARGET_VADDUQM" + "vsubecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; We use V2DI as the output type to simplify converting the permute +;; bits into an integer +(define_insn "altivec_vbpermq" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "type" "vecperm")]) + +; One of the vector API interfaces requires returning vector unsigned char. +(define_insn "altivec_vbpermq2" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vbpermd" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMD))] + "TARGET_P9_VECTOR" + "vbpermd %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Decimal Integer operations +(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) + +(define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") + (UNSPEC_BCDSUB "sub")]) + +(define_code_iterator BCD_TEST [eq lt gt unordered]) + +(define_insn "bcd<bcd_add_sub>" + [(set (match_operand:V1TI 0 "gpc_reg_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "gpc_reg_operand" "v") + (match_operand:V1TI 2 "gpc_reg_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "n")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP CR6_REGNO))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Use a floating point type (V2DFmode) for the compare to set CR6 so that we +;; can use the unordered test for BCD nans and add/subtracts that overflow. An +;; UNORDERED test on an integer type (like V1TImode) is not defined. The type +;; probably should be one that can go in the VMX (Altivec) registers, so we +;; can't use DDmode or DFmode. +(define_insn "*bcd<bcd_add_sub>_test" + [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j"))) + (clobber (match_scratch:V1TI 0 "=v"))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "*bcd<bcd_add_sub>_test2" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j")))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "darn_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_DARN_32))] + "TARGET_P9_MISC" + "darn %0,0" + [(set_attr "type" "integer")]) + +(define_insn "darn_raw" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,2" + [(set_attr "type" "integer")]) + +(define_insn "darn" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,1" + [(set_attr "type" "integer")]) + +;; Test byte within range. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Return in target register operand 0 a value of 1 if lo <= vv and +;; vv <= hi. Otherwise, set register operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; lo <= vv and vv <= hi. Otherwise, set the GT bit to 0. The other +;; 3 bits of the target CR register are all set to 0. +(define_insn "*cmprb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB))] + "TARGET_P9_MISC" + "cmprb %0,0,%1,%2" + [(set_attr "type" "logical")]) + +;; Set operand 0 register to -1 if the LT bit (0x8) of condition +;; register operand 1 is on. Otherwise, set operand 0 register to 1 +;; if the GT bit (0x4) of condition register operand 1 is on. +;; Otherwise, set operand 0 to 0. Note that the result stored into +;; register operand 0 is non-zero iff either the LT or GT bits are on +;; within condition register operand 1. +(define_insn "setb_signed" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +(define_insn "setb_unsigned" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gtu (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +;; Test byte within two ranges. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Return in target register operand 0 a value of 1 if (lo_1 <= vv and +;; vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). Otherwise, set register +;; operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb2" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the ranges specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; (lo_1 <= vv and vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). +;; Otherwise, set the GT bit to 0. The other 3 bits of the target +;; CR register are all set to 0. +(define_insn "*cmprb2_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2))] + "TARGET_P9_MISC" + "cmprb %0,1,%1,%2" + [(set_attr "type" "logical")]) + +;; Test byte membership within set of 8 bytes. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Return in target register operand 0 a value of 1 if vv equals one +;; of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, set +;; register operand 0 to 0. Note that the 8 byte values held within +;; operand 2 need not be unique. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation requires that operands +;; 0 and 1 have mode SI as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmpeqb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if vv +;; equals one of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, +;; set the GT bit to zero. The other 3 bits of the target CR register +;; are all set to 0. +(define_insn "*cmpeqb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB))] + "TARGET_P9_MISC && TARGET_64BIT" + "cmpeqb %0,%1,%2" + [(set_attr "type" "logical")]) + +(define_expand "bcd<bcd_add_sub>_<code>" + [(parallel [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB) + (match_dup 4))) + (clobber (match_scratch:V1TI 5 ""))]) + (set (match_operand:SI 0 "register_operand" "") + (BCD_TEST:SI (reg:CCFP CR6_REGNO) + (const_int 0)))] + "TARGET_P8_VECTOR" +{ + operands[4] = CONST0_RTX (V2DFmode); +}) + +;; Peephole2 pattern to combine a bcdadd/bcdsub that calculates the value and +;; the bcdadd/bcdsub that tests the value. The combiner won't work since +;; CR6 is a hard coded register. Unfortunately, all of the Altivec predicate +;; support is hard coded to use the fixed register CR6 instead of creating +;; a register class for CR6. + +(define_peephole2 + [(parallel [(set (match_operand:V1TI 0 "register_operand" "") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP CR6_REGNO))]) + (parallel [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" ""))) + (clobber (match_operand:V1TI 5 "register_operand" ""))])] + "TARGET_P8_VECTOR" + [(parallel [(set (match_dup 0) + (unspec:V1TI [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_dup 4)))])])