Mercurial > hg > CbC > CbC_gcc
diff gcc/config/ia64/vect.md @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | 77e2b8dfacca |
children | 04ced10e8804 |
line wrap: on
line diff
--- a/gcc/config/ia64/vect.md Tue May 25 18:58:51 2010 +0900 +++ b/gcc/config/ia64/vect.md Tue Mar 22 17:18:12 2011 +0900 @@ -1,5 +1,5 @@ ;; IA-64 machine description for vector operations. -;; Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. +;; Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; @@ -24,6 +24,7 @@ (define_mode_iterator VECINT12 [V8QI V4HI]) (define_mode_iterator VECINT24 [V4HI V2SI]) (define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")]) +(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")]) (define_expand "mov<mode>" [(set (match_operand:VECINT 0 "general_operand" "") @@ -171,38 +172,73 @@ (match_operand:V8QI 2 "gr_register_operand" "r")))] "" { - rtx r1, l1, r2, l2, rm, lm; - - r1 = gen_reg_rtx (V4HImode); - l1 = gen_reg_rtx (V4HImode); - r2 = gen_reg_rtx (V4HImode); - l2 = gen_reg_rtx (V4HImode); - - /* Zero-extend the QImode elements into two words of HImode elements - by interleaving them with zero bytes. */ - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2), - operands[2], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2), - operands[2], CONST0_RTX (V8QImode))); - - /* Multiply. */ - rm = gen_reg_rtx (V4HImode); - lm = gen_reg_rtx (V4HImode); - emit_insn (gen_mulv4hi3 (rm, r1, r2)); - emit_insn (gen_mulv4hi3 (lm, l1, l2)); - - /* Zap the high order bytes of the HImode elements by overwriting those - in one part with the low order bytes of the other. */ - emit_insn (gen_mix1_r (operands[0], - gen_lowpart (V8QImode, rm), - gen_lowpart (V8QImode, lm))); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2])); + emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2])); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l)); + else + emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h)); DONE; }) +(define_expand "vec_widen_umult_lo_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacku_lo_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacku_lo_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_umult_hi_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacku_hi_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacku_hi_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_smult_lo_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacks_lo_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacks_lo_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_smult_hi_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacks_hi_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacks_hi_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + (define_insn "mulv4hi3" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r") @@ -211,7 +247,35 @@ "pmpyshr2 %0 = %1, %2, 0" [(set_attr "itanium_class" "mmmul")]) -(define_insn "pmpy2_r" +(define_insn "pmpyshr2" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (truncate:V4HI + (ashiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (sign_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r"))) + (match_operand:SI 3 "pmpyshr_operand" "n"))))] + "" + "pmpyshr2 %0 = %1, %2, %3" + [(set_attr "itanium_class" "mmmul")]) + +(define_insn "pmpyshr2_u" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI + (zero_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (zero_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r"))) + (match_operand:SI 3 "pmpyshr_operand" "n"))))] + "" + "pmpyshr2.u %0 = %1, %2, %3" + [(set_attr "itanium_class" "mmmul")]) + +(define_insn "pmpy2_even" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -223,10 +287,16 @@ (match_operand:V4HI 2 "gr_register_operand" "r")) (parallel [(const_int 0) (const_int 2)]))))] "" - "pmpy2.r %0 = %1, %2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pmpy2.l %0 = %1, %2"; + else + return "%,pmpy2.r %0 = %1, %2"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "pmpy2_l" +(define_insn "pmpy2_odd" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -238,9 +308,138 @@ (match_operand:V4HI 2 "gr_register_operand" "r")) (parallel [(const_int 1) (const_int 3)]))))] "" - "pmpy2.l %0 = %1, %2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pmpy2.r %0 = %1, %2"; + else + return "%,pmpy2.l %0 = %1, %2"; +} [(set_attr "itanium_class" "mmshf")]) +(define_expand "vec_widen_smult_lo_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, false); + DONE; +}) + +(define_expand "vec_widen_smult_hi_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, true); + DONE; +}) + +(define_expand "vec_widen_umult_lo_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, false); + DONE; +}) + +(define_expand "vec_widen_umult_hi_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, true); + DONE; +}) + +(define_expand "mulv2si3" + [(set (match_operand:V2SI 0 "gr_register_operand" "") + (mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r") + (match_operand:V2SI 2 "gr_register_operand" "r")))] + "" +{ + rtx t0, t1, t2, t3, t4, t5, t6, t7, x; + rtx op1h = gen_lowpart (V4HImode, operands[1]); + rtx op2h = gen_lowpart (V4HImode, operands[2]); + + t0 = gen_reg_rtx (V4HImode); + t1 = gen_reg_rtx (V4HImode); + t2 = gen_reg_rtx (V4HImode); + t3 = gen_reg_rtx (V4HImode); + t4 = gen_reg_rtx (V2SImode); + t5 = gen_reg_rtx (V2SImode); + t6 = gen_reg_rtx (V2SImode); + t7 = gen_reg_rtx (V2SImode); + + /* Consider the HImode components of op1 = DCBA, op2 = ZYXW. + Consider .l and .h suffixes below the low and high 16 bits + of the full 32-bit product. */ + + /* T0 = CDBA. */ + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx, + GEN_INT (3), const2_rtx)); + x = gen_rtx_VEC_SELECT (V4HImode, op1h, x); + emit_insn (gen_rtx_SET (VOIDmode, t0, x)); + + /* T1 = DZ.l, CY.l, BX.l, AW.l. */ + emit_insn (gen_mulv4hi3 (t1, op1h, op2h)); + + /* T2 = DZ.h, CY.h, BX.h, AW.h. */ + emit_insn (gen_pmpyshr2_u (t2, op1h, op2h, GEN_INT (16))); + + /* T3 = CZ.l, DY.l, AX.l, BW.l. */ + emit_insn (gen_mulv4hi3 (t3, t0, op2h)); + + /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */ + x = gen_lowpart (V4HImode, t4); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_odd (x, t2, t1); + else + x = gen_mix2_even (x, t1, t2); + emit_insn (x); + + /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */ + x = gen_lowpart (V4HImode, t5); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode)); + else + x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3); + emit_insn (x); + + /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */ + x = gen_lowpart (V4HImode, t6); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode)); + else + x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3); + emit_insn (x); + + emit_insn (gen_addv2si3 (t7, t4, t5)); + emit_insn (gen_addv2si3 (operands[0], t6, t7)); + DONE; +}) + (define_expand "umax<mode>3" [(set (match_operand:VECINT 0 "gr_register_operand" "") (umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "") @@ -429,16 +628,36 @@ (match_operand:V2SI 3 "gr_register_operand" "")] "" { - rtx l, r, t; + rtx e, o, t; - r = gen_reg_rtx (V2SImode); - l = gen_reg_rtx (V2SImode); + e = gen_reg_rtx (V2SImode); + o = gen_reg_rtx (V2SImode); t = gen_reg_rtx (V2SImode); - emit_insn (gen_pmpy2_r (r, operands[1], operands[2])); - emit_insn (gen_pmpy2_l (l, operands[1], operands[2])); - emit_insn (gen_addv2si3 (t, r, operands[3])); - emit_insn (gen_addv2si3 (operands[0], t, l)); + emit_insn (gen_pmpy2_even (e, operands[1], operands[2])); + emit_insn (gen_pmpy2_odd (o, operands[1], operands[2])); + emit_insn (gen_addv2si3 (t, e, operands[3])); + emit_insn (gen_addv2si3 (operands[0], t, o)); + DONE; +}) + +(define_expand "udot_prodv4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "") + (match_operand:V2SI 3 "gr_register_operand" "")] + "" +{ + rtx l, h, t; + + l = gen_reg_rtx (V2SImode); + h = gen_reg_rtx (V2SImode); + t = gen_reg_rtx (V2SImode); + + emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2])); + emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2])); + emit_insn (gen_addv2si3 (t, l, operands[3])); + emit_insn (gen_addv2si3 (operands[0], t, h)); DONE; }) @@ -486,7 +705,7 @@ "pcmp<vecsize>.gt %0 = %r1, %r2" [(set_attr "itanium_class" "mmalua")]) -(define_insn "pack2_sss" +(define_insn "vec_pack_ssat_v4hi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_concat:V8QI (ss_truncate:V4QI @@ -494,10 +713,16 @@ (ss_truncate:V4QI (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack2.sss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack2.sss %0 = %r2, %r1"; + else + return "%,pack2.sss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "*pack2_uss" +(define_insn "vec_pack_usat_v4hi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_concat:V8QI (us_truncate:V4QI @@ -505,10 +730,16 @@ (us_truncate:V4QI (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack2.uss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack2.uss %0 = %r2, %r1"; + else + return "%,pack2.uss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "pack4_sss" +(define_insn "vec_pack_ssat_v2si" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_concat:V4HI (ss_truncate:V2HI @@ -516,93 +747,103 @@ (ss_truncate:V2HI (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack4.sss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack4.sss %0 = %r2, %r1"; + else + return "%,pack4.sss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack1_l" +(define_insn "vec_interleave_lowv8qi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3) - (const_int 8) - (const_int 9) - (const_int 10) - (const_int 11)])))] + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] "" - "unpack1.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack1.l %0 = %r1, %r2"; + else + return "%,unpack1.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack1_h" +(define_insn "vec_interleave_highv8qi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 4) - (const_int 5) - (const_int 6) - (const_int 7) - (const_int 12) - (const_int 13) - (const_int 14) - (const_int 15)])))] + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] "" - "unpack1.h %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack1.h %0 = %r1, %r2"; + else + return "%,unpack1.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix1_r" +(define_insn "mix1_even" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 8) - (const_int 2) - (const_int 10) - (const_int 4) - (const_int 12) - (const_int 6) - (const_int 14)])))] + (parallel [(const_int 0) (const_int 8) + (const_int 2) (const_int 10) + (const_int 4) (const_int 12) + (const_int 6) (const_int 14)])))] "" - "mix1.r %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix1.l %0 = %r1, %r2"; + else + return "%,mix1.r %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix1_l" +(define_insn "mix1_odd" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 9) - (const_int 3) - (const_int 11) - (const_int 5) - (const_int 13) - (const_int 7) - (const_int 15)])))] + (parallel [(const_int 1) (const_int 9) + (const_int 3) (const_int 11) + (const_int 5) (const_int 13) + (const_int 7) (const_int 15)])))] "" - "mix1.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix1.r %0 = %r1, %r2"; + else + return "%,mix1.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "*mux1_rev" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 7) - (const_int 6) - (const_int 5) - (const_int 4) - (const_int 3) - (const_int 2) - (const_int 1) - (const_int 0)])))] + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "" "mux1 %0 = %1, @rev" [(set_attr "itanium_class" "mmshf")]) @@ -611,14 +852,10 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 4) - (const_int 2) - (const_int 6) - (const_int 1) - (const_int 5) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6) + (const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] "" "mux1 %0 = %1, @mix" [(set_attr "itanium_class" "mmshf")]) @@ -627,30 +864,22 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "" "mux1 %0 = %1, @shuf" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mux1_alt" +(define_insn "mux1_alt" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))] "" "mux1 %0 = %1, @alt" [(set_attr "itanium_class" "mmshf")]) @@ -659,14 +888,14 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0)])))] + (parallel [(match_operand 2 "mux1_brcst_element" "") + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2)])))] "" "mux1 %0 = %1, @brcst" [(set_attr "itanium_class" "mmshf")]) @@ -679,60 +908,100 @@ "mux1 %0 = %1, @brcst" [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack2_l" - [(set (match_operand:V4HI 0 "gr_register_operand" "=r") - (vec_select:V4HI - (vec_concat:V8HI - (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") - (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5)])))] +(define_expand "vec_extract_evenv8qi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] "" - "unpack2.l %0 = %r2, %r1" - [(set_attr "itanium_class" "mmshf")]) +{ + rtx temp = gen_reg_rtx (V8QImode); + emit_insn (gen_mix1_even (temp, operands[1], operands[2])); + emit_insn (gen_mux1_alt (operands[0], temp)); + DONE; +}) -(define_insn "unpack2_h" +(define_expand "vec_extract_oddv8qi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx temp = gen_reg_rtx (V8QImode); + emit_insn (gen_mix1_odd (temp, operands[1], operands[2])); + emit_insn (gen_mux1_alt (operands[0], temp)); + DONE; +}) + +(define_insn "vec_interleave_lowv4hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "" - "unpack2.h %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack2.l %0 = %r1, %r2"; + else + return "%,unpack2.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mix2_r" +(define_insn "vec_interleave_highv4hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 4) - (const_int 2) - (const_int 6)])))] + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "" - "mix2.r %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack2.h %0 = %r1, %r2"; + else + return "%,unpack2.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mix2_l" +(define_insn "mix2_even" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 5) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] "" - "mix2.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix2.l %0 = %r1, %r2"; + else + return "%,mix2.r %0 = %r2, %r1"; +} + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "mix2_odd" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix2.r %0 = %r1, %r2"; + else + return "%,mix2.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "*mux2" @@ -745,16 +1014,58 @@ (match_operand 5 "const_int_2bit_operand" "")])))] "" { - int mask; - mask = INTVAL (operands[2]); - mask |= INTVAL (operands[3]) << 2; - mask |= INTVAL (operands[4]) << 4; - mask |= INTVAL (operands[5]) << 6; + int mask = 0; + if (TARGET_BIG_ENDIAN) + { + mask |= (3 - INTVAL (operands[2])) << 6; + mask |= (3 - INTVAL (operands[3])) << 4; + mask |= (3 - INTVAL (operands[4])) << 2; + mask |= 3 - INTVAL (operands[5]); + } + else + { + mask |= INTVAL (operands[2]); + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + } operands[2] = GEN_INT (mask); return "%,mux2 %0 = %1, %2"; } [(set_attr "itanium_class" "mmshf")]) +(define_expand "vec_extract_evenodd_helper" + [(set (match_operand:V4HI 0 "gr_register_operand" "") + (vec_select:V4HI + (match_operand:V4HI 1 "gr_register_operand" "") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])))] + "") + +(define_expand "vec_extract_evenv4hi" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_reg_or_0_operand") + (match_operand:V4HI 2 "gr_reg_or_0_operand")] + "" +{ + rtx temp = gen_reg_rtx (V4HImode); + emit_insn (gen_mix2_even (temp, operands[1], operands[2])); + emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); + DONE; +}) + +(define_expand "vec_extract_oddv4hi" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_reg_or_0_operand") + (match_operand:V4HI 2 "gr_reg_or_0_operand")] + "" +{ + rtx temp = gen_reg_rtx (V4HImode); + emit_insn (gen_mix2_odd (temp, operands[1], operands[2])); + emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); + DONE; +}) + (define_insn "*mux2_brcst_hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_duplicate:V4HI @@ -763,31 +1074,69 @@ "mux2 %0 = %1, 0" [(set_attr "itanium_class" "mmshf")]) -;; Note that mix4.r performs the exact same operation. -(define_insn "*unpack4_l" +(define_insn "vec_interleave_lowv2si" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) (const_int 2)])))] + "" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.l %0 = %r1, %r2"; + else + return "%,unpack4.l %0 = %r2, %r1"; +} + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "vec_interleave_highv2si" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (vec_select:V2SI (vec_concat:V4SI (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 2)])))] + (parallel [(const_int 1) (const_int 3)])))] "" - "unpack4.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.h %0 = %r1, %r2"; + else + return "%,unpack4.h %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -;; Note that mix4.l performs the exact same operation. -(define_insn "*unpack4_h" - [(set (match_operand:V2SI 0 "gr_register_operand" "=r") - (vec_select:V2SI - (vec_concat:V4SI - (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") - (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 3)])))] +(define_expand "vec_extract_evenv2si" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] "" - "unpack4.h %0 = %r2, %r1" - [(set_attr "itanium_class" "mmshf")]) +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "vec_extract_oddv2si" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], + operands[2])); + DONE; +}) (define_expand "vec_initv2si" [(match_operand:V2SI 0 "gr_register_operand" "") @@ -810,10 +1159,7 @@ if (!gr_reg_or_0_operand (op2, SImode)) op2 = force_reg (SImode, op2); - if (TARGET_BIG_ENDIAN) - x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1); - else - x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2); + x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2); emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); DONE; }) @@ -824,14 +1170,19 @@ (match_operand:SI 1 "gr_reg_or_0_operand" "rO") (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))] "" - "unpack4.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.l %0 = %r1, %r2"; + else + return "%,unpack4.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) ;; Missing operations ;; padd.uus ;; pavg ;; pavgsub -;; pmpyshr, general form ;; psad ;; pshladd ;; pshradd @@ -903,106 +1254,29 @@ "fpnegabs %0 = %1" [(set_attr "itanium_class" "fmisc")]) -;; In order to convince combine to merge plus and mult to a useful fpma, -;; we need a couple of extra patterns. (define_expand "addv2sf3" - [(parallel - [(set (match_operand:V2SF 0 "fr_register_operand" "") - (plus:V2SF (match_operand:V2SF 1 "fr_register_operand" "") - (match_operand:V2SF 2 "fr_register_operand" ""))) - (use (match_dup 3))])] + [(set (match_operand:V2SF 0 "fr_register_operand" "") + (fma:V2SF (match_operand:V2SF 1 "fr_register_operand" "") + (match_dup 3) + (match_operand:V2SF 2 "fr_register_operand" "")))] "" { rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode)); operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v)); - if (!TARGET_FUSED_MADD) - { - emit_insn (gen_fpma (operands[0], operands[1], operands[3], operands[2])); - DONE; - } }) -;; The split condition here could be combine_completed, if we had such. -(define_insn_and_split "*addv2sf3_1" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (plus:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f"))) - (use (match_operand:V2SF 3 "fr_register_operand" "f"))] - "" - "#" - "reload_completed" - [(set (match_dup 0) - (plus:V2SF - (mult:V2SF (match_dup 1) (match_dup 3)) - (match_dup 2)))] - "") - -(define_insn_and_split "*addv2sf3_2" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (plus:V2SF - (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f")) - (match_operand:V2SF 3 "fr_register_operand" "f"))) - (use (match_operand:V2SF 4 "" "X"))] - "" - "#" - "" - [(set (match_dup 0) - (plus:V2SF - (mult:V2SF (match_dup 1) (match_dup 2)) - (match_dup 3)))] - "") - -;; In order to convince combine to merge minus and mult to a useful fpms, -;; we need a couple of extra patterns. (define_expand "subv2sf3" - [(parallel - [(set (match_operand:V2SF 0 "fr_register_operand" "") - (minus:V2SF (match_operand:V2SF 1 "fr_register_operand" "") - (match_operand:V2SF 2 "fr_register_operand" ""))) - (use (match_dup 3))])] + [(set (match_operand:V2SF 0 "fr_register_operand" "") + (fma:V2SF + (match_operand:V2SF 1 "fr_register_operand" "") + (match_dup 3) + (neg:V2SF (match_operand:V2SF 2 "fr_register_operand" ""))))] "" { rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode)); operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v)); - if (!TARGET_FUSED_MADD) - { - emit_insn (gen_fpms (operands[0], operands[1], operands[3], operands[2])); - DONE; - } }) -;; The split condition here could be combine_completed, if we had such. -(define_insn_and_split "*subv2sf3_1" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (minus:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f"))) - (use (match_operand:V2SF 3 "fr_register_operand" "f"))] - "" - "#" - "reload_completed" - [(set (match_dup 0) - (minus:V2SF - (mult:V2SF (match_dup 1) (match_dup 3)) - (match_dup 2)))] - "") - -(define_insn_and_split "*subv2sf3_2" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (minus:V2SF - (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f")) - (match_operand:V2SF 3 "fr_register_operand" "f"))) - (use (match_operand:V2SF 4 "" "X"))] - "" - "#" - "" - [(set (match_dup 0) - (minus:V2SF - (mult:V2SF (match_dup 1) (match_dup 2)) - (match_dup 3)))] - "") - (define_insn "mulv2sf3" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") @@ -1011,22 +1285,22 @@ "fpmpy %0 = %1, %2" [(set_attr "itanium_class" "fmac")]) -(define_insn "fpma" +(define_insn "fmav2sf4" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (plus:V2SF - (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f")) + (fma:V2SF + (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f") (match_operand:V2SF 3 "fr_register_operand" "f")))] "" "fpma %0 = %1, %2, %3" [(set_attr "itanium_class" "fmac")]) -(define_insn "fpms" +(define_insn "fmsv2sf4" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (minus:V2SF - (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f")) - (match_operand:V2SF 3 "fr_register_operand" "f")))] + (fma:V2SF + (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f") + (neg:V2SF (match_operand:V2SF 3 "fr_register_operand" "f"))))] "" "fpms %0 = %1, %2, %3" [(set_attr "itanium_class" "fmac")]) @@ -1040,12 +1314,11 @@ "fpnmpy %0 = %1, %2" [(set_attr "itanium_class" "fmac")]) -(define_insn "*fpnma" +(define_insn "fnmav2sf4" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (plus:V2SF - (neg:V2SF - (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") - (match_operand:V2SF 2 "fr_register_operand" "f"))) + (fma:V2SF + (neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")) + (match_operand:V2SF 2 "fr_register_operand" "f") (match_operand:V2SF 3 "fr_register_operand" "f")))] "" "fpnma %0 = %1, %2, %3" @@ -1073,7 +1346,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1084,7 +1360,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1095,7 +1374,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1161,10 +1443,7 @@ if (!fr_reg_or_fp01_operand (op2, SFmode)) op2 = force_reg (SFmode, op2); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_fpack (operands[0], op2, op1)); - else - emit_insn (gen_fpack (operands[0], op1, op2)); + emit_insn (gen_fpack (operands[0], op1, op2)); DONE; }) @@ -1174,7 +1453,13 @@ (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] "" - "fpack %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fpack %0 = %F1, %F2"; + else + return "%,fpack %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "fswap" @@ -1185,10 +1470,16 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 1) (const_int 2)])))] "" - "fswap %0 = %F1, %F2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fswap %0 = %F2, %F1"; + else + return "%,fswap %0 = %F1, %F2"; +} [(set_attr "itanium_class" "fmisc")]) -(define_insn "*fmix_l" +(define_insn "vec_interleave_highv2sf" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (vec_select:V2SF (vec_concat:V4SF @@ -1196,10 +1487,16 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 1) (const_int 3)])))] "" - "fmix.l %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.l %0 = %F1, %F2"; + else + return "%,fmix.l %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) -(define_insn "fmix_r" +(define_insn "vec_interleave_lowv2sf" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (vec_select:V2SF (vec_concat:V4SF @@ -1207,7 +1504,13 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 0) (const_int 2)])))] "" - "fmix.r %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.r %0 = %F1, %F2"; + else + return "%,fmix.r %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "fmix_lr" @@ -1218,25 +1521,63 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 0) (const_int 3)])))] "" - "fmix.lr %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.lr %0 = %F1, %F2"; + else + return "%,fmix.lr %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) +(define_expand "vec_extract_evenv2sf" + [(match_operand:V2SF 0 "gr_register_operand" "") + (match_operand:V2SF 1 "gr_register_operand" "") + (match_operand:V2SF 2 "gr_register_operand" "")] + "" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "vec_extract_oddv2sf" + [(match_operand:V2SF 0 "gr_register_operand" "") + (match_operand:V2SF 1 "gr_register_operand" "") + (match_operand:V2SF 2 "gr_register_operand" "")] + "" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1], + operands[2])); + DONE; +}) + (define_expand "vec_setv2sf" [(match_operand:V2SF 0 "fr_register_operand" "") (match_operand:SF 1 "fr_register_operand" "") (match_operand 2 "const_int_operand" "")] "" { + rtx op0 = operands[0]; rtx tmp = gen_reg_rtx (V2SFmode); + emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode))); switch (INTVAL (operands[2])) { case 0: - emit_insn (gen_fmix_lr (operands[0], tmp, operands[0])); + emit_insn (gen_fmix_lr (op0, tmp, op0)); break; case 1: - emit_insn (gen_fmix_r (operands[0], operands[0], tmp)); + emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp)); break; default: gcc_unreachable (); @@ -1263,8 +1604,8 @@ }) (define_insn_and_split "*vec_extractv2sf_0_be" - [(set (match_operand:SF 0 "register_operand" "=r,f") - (unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r") + [(set (match_operand:SF 0 "register_operand" "=rf,r") + (unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r") (const_int 0)] UNSPEC_VECT_EXTR))] "TARGET_BIG_ENDIAN" @@ -1272,31 +1613,44 @@ "reload_completed" [(set (match_dup 0) (match_dup 1))] { - if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1]))) - operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0])); + if (MEM_P (operands[1])) + operands[1] = adjust_address (operands[1], SFmode, 0); else - operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); + { + emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); + DONE; + } }) -(define_insn_and_split "*vec_extractv2sf_1" +(define_insn_and_split "*vec_extractv2sf_1_le" [(set (match_operand:SF 0 "register_operand" "=r") (unspec:SF [(match_operand:V2SF 1 "register_operand" "r") (const_int 1)] UNSPEC_VECT_EXTR))] - "" + "!TARGET_BIG_ENDIAN" "#" - "reload_completed" + "&& reload_completed" [(const_int 0)] { operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); - if (TARGET_BIG_ENDIAN) - emit_move_insn (operands[0], operands[1]); - else - emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); + emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); DONE; }) +(define_insn_and_split "*vec_extractv2sf_1_be" + [(set (match_operand:SF 0 "register_operand" "=rf") + (unspec:SF [(match_operand:V2SF 1 "register_operand" "r") + (const_int 1)] + UNSPEC_VECT_EXTR))] + "TARGET_BIG_ENDIAN" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); +}) + (define_expand "vec_extractv2sf" [(set (match_operand:SF 0 "register_operand" "") (unspec:SF [(match_operand:V2SF 1 "register_operand" "") @@ -1305,6 +1659,72 @@ "" "") +(define_expand "vec_unpacku_lo_<mode>" + [(match_operand:<vecwider> 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacku_hi_<mode>" + [(match_operand:<vecwider> 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, true, true); + DONE; +}) + +(define_expand "vec_unpacks_lo_<mode>" + [(match_operand:<vecwider> 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, false, false); + DONE; +}) + +(define_expand "vec_unpacks_hi_<mode>" + [(match_operand:<vecwider> 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_pack_trunc_v4hi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_lowpart (V8QImode, operands[1]); + rtx op2 = gen_lowpart (V8QImode, operands[2]); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2)); + else + emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2)); + DONE; +}) + +(define_expand "vec_pack_trunc_v2si" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_lowpart (V4HImode, operands[1]); + rtx op2 = gen_lowpart (V4HImode, operands[2]); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2)); + else + emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2)); + DONE; +}) + ;; Missing operations ;; fprcpa ;; fpsqrta