Mercurial > hg > CbC > CbC_gcc
diff gcc/config/rs6000/vector.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/config/rs6000/vector.md Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/config/rs6000/vector.md Fri Oct 27 22:46:09 2017 +0900 @@ -3,8 +3,7 @@ ;; expander, and the actual vector instructions will be in altivec.md and ;; vsx.md -;; Copyright (C) 2009, 2010, 2011 -;; Free Software Foundation, Inc. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> ;; This file is part of GCC. @@ -25,28 +24,36 @@ ;; Vector int modes -(define_mode_iterator VEC_I [V16QI V8HI V4SI]) +(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) + +;; Vector int modes for parity +(define_mode_iterator VEC_IP [V8HI + V4SI + V2DI + V1TI + TI]) ;; Vector float modes (define_mode_iterator VEC_F [V4SF V2DF]) ;; Vector arithmetic modes -(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF]) +(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF]) ;; Vector modes that need alginment via permutes (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) ;; Vector logical modes -(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF]) -;; Vector modes for moves. Don't do TImode here. -(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +;; Vector modes for moves. Don't do TImode or TFmode here, since their +;; moves are handled elsewhere. +(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF]) ;; Vector modes for types that don't need a realignment under VSX -(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF]) +(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) ;; Vector comparison modes -(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF]) +(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF]) ;; Vector init/extract modes (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -54,8 +61,8 @@ ;; Vector modes for 64-bit base types (define_mode_iterator VEC_64 [V2DI V2DF]) -;; Vector reload iterator -(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI]) +;; Vector integer modes +(define_mode_iterator VI [V4SI V8HI V16QI]) ;; Base type from vector mode (define_mode_attr VEC_base [(V16QI "QI") @@ -64,8 +71,19 @@ (V2DI "DI") (V4SF "SF") (V2DF "DF") + (V1TI "TI") (TI "TI")]) +;; As above, but in lower case +(define_mode_attr VEC_base_l [(V16QI "qi") + (V8HI "hi") + (V4SI "si") + (V2DI "di") + (V4SF "sf") + (V2DF "df") + (V1TI "ti") + (TI "ti")]) + ;; Same size integer type for floating point data (define_mode_attr VEC_int [(V4SF "v4si") (V2DF "v2di")]) @@ -74,11 +92,24 @@ (V2DF "V2DI")]) ;; constants for unspec -(define_constants - [(UNSPEC_PREDICATE 400)]) +(define_c_enum "unspec" [UNSPEC_PREDICATE + UNSPEC_REDUC + UNSPEC_NEZ_P]) + +;; Vector reduction code iterators +(define_code_iterator VEC_reduc [plus smin smax]) + +(define_code_attr VEC_reduc_name [(plus "plus") + (smin "smin") + (smax "smax")]) + +(define_code_attr VEC_reduc_rtx [(plus "add") + (smin "smin") + (smax "smax")]) -;; Vector move instructions. +;; Vector move instructions. Little-endian VSX loads and stores require +;; special handling to circumvent "element endianness." (define_expand "mov<mode>" [(set (match_operand:VEC_M 0 "nonimmediate_operand" "") (match_operand:VEC_M 1 "any_operand" ""))] @@ -86,14 +117,31 @@ { if (can_create_pseudo_p ()) { - if (CONSTANT_P (operands[1]) - && !easy_vector_constant (operands[1], <MODE>mode)) - operands[1] = force_const_mem (<MODE>mode, operands[1]); + if (CONSTANT_P (operands[1])) + { + if (FLOAT128_VECTOR_P (<MODE>mode)) + { + if (!easy_fp_constant (operands[1], <MODE>mode)) + operands[1] = force_const_mem (<MODE>mode, operands[1]); + } + else if (!easy_vector_constant (operands[1], <MODE>mode)) + operands[1] = force_const_mem (<MODE>mode, operands[1]); + } - else if (!vlogical_operand (operands[0], <MODE>mode) - && !vlogical_operand (operands[1], <MODE>mode)) + if (!vlogical_operand (operands[0], <MODE>mode) + && !vlogical_operand (operands[1], <MODE>mode)) operands[1] = force_reg (<MODE>mode, operands[1]); } + if (!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (<MODE>mode) + && !TARGET_P9_VECTOR + && !gpr_or_gpr_p (operands[0], operands[1]) + && (memory_operand (operands[0], <MODE>mode) + ^ memory_operand (operands[1], <MODE>mode))) + { + rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); + DONE; + } }) ;; Generic vector floating point load/store instructions. These will match @@ -116,7 +164,9 @@ (match_operand:VEC_L 1 "input_operand" ""))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); @@ -138,7 +188,14 @@ if (VECTOR_MEM_VSX_P (<MODE>mode)) { operands[1] = rs6000_address_for_altivec (operands[1]); - emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1])); + rtx and_op = XEXP (operands[1], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_lvx_<mode>_2op (operands[0], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_lvx_<mode>_1op (operands[0], operands[1])); DONE; } }") @@ -154,72 +211,20 @@ if (VECTOR_MEM_VSX_P (<MODE>mode)) { operands[0] = rs6000_address_for_altivec (operands[0]); - emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1])); + rtx and_op = XEXP (operands[0], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_stvx_<mode>_2op (operands[1], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_stvx_<mode>_1op (operands[1], operands[0])); DONE; } }") -;; Reload patterns for vector operations. We may need an addtional base -;; register to convert the reg+offset addressing to reg+reg for vector -;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index -;; register for gpr registers. -(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_store" - [(parallel [(match_operand:VEC_R 0 "memory_operand" "m") - (match_operand:VEC_R 1 "gpc_reg_operand" "r") - (match_operand:P 2 "register_operand" "=&b")])] - "<P:tptrsize>" -{ - rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true); - DONE; -}) - -(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_load" - [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r") - (match_operand:VEC_R 1 "memory_operand" "m") - (match_operand:P 2 "register_operand" "=&b")])] - "<P:tptrsize>" -{ - rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false); - DONE; -}) - -;; Reload sometimes tries to move the address to a GPR, and can generate -;; invalid RTL for addresses involving AND -16. Allow addresses involving -;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16. - -(define_insn_and_split "*vec_reload_and_plus_<mptrsize>" - [(set (match_operand:P 0 "gpc_reg_operand" "=b") - (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") - (match_operand:P 2 "reg_or_cint_operand" "rI")) - (const_int -16)))] - "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" - "#" - "&& reload_completed" - [(set (match_dup 0) - (plus:P (match_dup 1) - (match_dup 2))) - (parallel [(set (match_dup 0) - (and:P (match_dup 0) - (const_int -16))) - (clobber:CC (scratch:CC))])]) - -;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16 -;; address to a register because there is no clobber of a (scratch), so we add -;; it here. -(define_insn_and_split "*vec_reload_and_reg_<mptrsize>" - [(set (match_operand:P 0 "gpc_reg_operand" "=b") - (and:P (match_operand:P 1 "gpc_reg_operand" "r") - (const_int -16)))] - "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" - "#" - "&& reload_completed" - [(parallel [(set (match_dup 0) - (and:P (match_dup 1) - (const_int -16))) - (clobber:CC (scratch:CC))])]) - ;; Generic floating point vector arithmetic support (define_expand "add<mode>3" [(set (match_operand:VEC_F 0 "vfloat_operand" "") @@ -239,7 +244,7 @@ [(set (match_operand:VEC_F 0 "vfloat_operand" "") (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] - "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" { if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) { @@ -253,7 +258,15 @@ (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" "")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "") +{ + if (RS6000_RECIP_AUTO_RE_P (<MODE>mode) + && can_create_pseudo_p () && flag_finite_math_only + && !flag_trapping_math && flag_reciprocal_math) + { + rs6000_emit_swdiv (operands[0], operands[1], operands[2], true); + DONE; + } +}) (define_expand "neg<mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") @@ -300,7 +313,16 @@ [(set (match_operand:VEC_F 0 "vfloat_operand" "") (sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "") +{ + if (<MODE>mode == V4SFmode + && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + rs6000_emit_swsqrt (operands[0], operands[1], 0); + DONE; + } +}) (define_expand "rsqrte<mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") @@ -359,7 +381,7 @@ ;; Vector comparisons -(define_expand "vcond<mode>" +(define_expand "vcond<mode><mode>" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (if_then_else:VEC_F (match_operator 3 "comparison_operator" @@ -377,15 +399,34 @@ FAIL; }") -(define_expand "vcond<mode>" - [(set (match_operand:VEC_I 0 "vint_operand" "") +(define_expand "vcond<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") (if_then_else:VEC_I (match_operator 3 "comparison_operator" - [(match_operand:VEC_I 4 "vint_operand" "") - (match_operand:VEC_I 5 "vint_operand" "")]) - (match_operand:VEC_I 1 "vint_operand" "") - (match_operand:VEC_I 2 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + [(match_operand:VEC_I 4 "vint_operand") + (match_operand:VEC_I 5 "vint_operand")]) + (match_operand:VEC_I 1 "vector_int_reg_or_same_bit") + (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondv4sfv4si" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (if_then_else:V4SF + (match_operator 3 "comparison_operator" + [(match_operand:V4SI 4 "vint_operand" "") + (match_operand:V4SI 5 "vint_operand" "")]) + (match_operand:V4SF 1 "vfloat_operand" "") + (match_operand:V4SF 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" " { if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], @@ -395,15 +436,53 @@ FAIL; }") -(define_expand "vcondu<mode>" - [(set (match_operand:VEC_I 0 "vint_operand" "") +(define_expand "vcondv4siv4sf" + [(set (match_operand:V4SI 0 "vint_operand" "") + (if_then_else:V4SI + (match_operator 3 "comparison_operator" + [(match_operand:V4SF 4 "vfloat_operand" "") + (match_operand:V4SF 5 "vfloat_operand" "")]) + (match_operand:V4SI 1 "vint_operand" "") + (match_operand:V4SI 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") (if_then_else:VEC_I (match_operator 3 "comparison_operator" - [(match_operand:VEC_I 4 "vint_operand" "") - (match_operand:VEC_I 5 "vint_operand" "")]) - (match_operand:VEC_I 1 "vint_operand" "") - (match_operand:VEC_I 2 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + [(match_operand:VEC_I 4 "vint_operand") + (match_operand:VEC_I 5 "vint_operand")]) + (match_operand:VEC_I 1 "vector_int_reg_or_same_bit") + (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vconduv4sfv4si" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (if_then_else:V4SF + (match_operator 3 "comparison_operator" + [(match_operand:V4SI 4 "vint_operand" "") + (match_operand:V4SI 5 "vint_operand" "")]) + (match_operand:V4SF 1 "vfloat_operand" "") + (match_operand:V4SF 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" " { if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], @@ -428,53 +507,191 @@ "") (define_expand "vector_ge<mode>" - [(set (match_operand:VEC_C 0 "vlogical_operand" "") - (ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") - (match_operand:VEC_C 2 "vlogical_operand" "")))] + [(set (match_operand:VEC_F 0 "vlogical_operand" "") + (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand" "") + (match_operand:VEC_F 2 "vlogical_operand" "")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nlt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + (define_expand "vector_gtu<mode>" [(set (match_operand:VEC_I 0 "vint_operand" "") (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nltu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + (define_expand "vector_geu<mode>" [(set (match_operand:VEC_I 0 "vint_operand" "") (geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; <= for integer vectors: apply not-greater-than +(define_expand "vector_ngt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_expand "vector_ngtu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_insn_and_split "*vector_uneq<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (gt:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (gt:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (and:VEC_F (not:VEC_F (match_dup 3)) + (not:VEC_F (match_dup 4))))] +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}) + +(define_insn_and_split "*vector_ltgt<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (gt:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (gt:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (ior:VEC_F (match_dup 3) + (match_dup 4)))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +(define_insn_and_split "*vector_ordered<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (ge:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (ge:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (ior:VEC_F (match_dup 3) + (match_dup 4)))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +(define_insn_and_split "*vector_unordered<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (ge:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (ge:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (and:VEC_F (not:VEC_F (match_dup 3)) + (not:VEC_F (match_dup 4))))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + ;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask ;; which is in the reverse order that we want (define_expand "vector_select_<mode>" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (if_then_else:VEC_L (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "") - (const_int 0)) + (match_dup 4)) (match_operand:VEC_L 2 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") + "operands[4] = CONST0_RTX (<MODE>mode);") (define_expand "vector_select_<mode>_uns" [(set (match_operand:VEC_L 0 "vlogical_operand" "") (if_then_else:VEC_L (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "") - (const_int 0)) + (match_dup 4)) (match_operand:VEC_L 2 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") + "operands[4] = CONST0_RTX (<MODE>mode);") ;; Expansions that compare vectors producing a vector result and a predicate, ;; setting CR6 to indicate a combined status (define_expand "vector_eq_<mode>_p" [(parallel - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(eq:CC (match_operand:VEC_A 1 "vlogical_operand" "") (match_operand:VEC_A 2 "vlogical_operand" ""))] UNSPEC_PREDICATE)) @@ -484,9 +701,164 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_all_ne built-in functions on Power9. +(define_expand "vector_ne_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ne:CC (match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (ne:VI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_any_eq built-in functions on Power9. +(define_expand "vector_ae_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ne:CC (match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (ne:VI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_all_nez and vec_any_eqz built-in +;; functions on Power9. +(define_expand "vector_nez_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(unspec:VI + [(match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand")] + UNSPEC_NEZ_P)] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "vlogical_operand") + (unspec:VI [(match_dup 1) + (match_dup 2)] + UNSPEC_NEZ_P))])] + "TARGET_P9_VECTOR" + "") + +;; This expansion handles the V2DI mode in the implementation of the +;; vec_all_ne built-in function on Power9. +;; +;; Since the Power9 "xvcmpne<mode>." instruction does not support DImode, +;; this expands into the same rtl that would be used for the Power8 +;; architecture. +(define_expand "vector_ne_v2di_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V2DI 1 "vlogical_operand") + (match_operand:V2DI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V2DI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (V2DImode); +}) + +;; This expansion handles the V2DI mode in the implementation of the +;; vec_any_eq built-in function on Power9. +;; +;; Since the Power9 "xvcmpne<mode>." instruction does not support DImode, +;; this expands into the same rtl that would be used for the Power8 +;; architecture. +(define_expand "vector_ae_v2di_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V2DI 1 "vlogical_operand") + (match_operand:V2DI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V2DI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (V2DImode); +}) + +;; This expansion handles the V4SF and V2DF modes in the Power9 +;; implementation of the vec_all_ne built-in functions. Note that the +;; expansions for this pattern with these modes makes no use of power9- +;; specific instructions since there are no new power9 instructions +;; for vector compare not equal with floating point arguments. +(define_expand "vector_ne_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VEC_F 1 "vlogical_operand") + (match_operand:VEC_F 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:VEC_F (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V4SF and V2DF modes in the Power9 +;; implementation of the vec_any_eq built-in functions. Note that the +;; expansions for this pattern with these modes makes no use of power9- +;; specific instructions since there are no new power9 instructions +;; for vector compare not equal with floating point arguments. +(define_expand "vector_ae_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VEC_F 1 "vlogical_operand") + (match_operand:VEC_F 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:VEC_F (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + (define_expand "vector_gt_<mode>_p" [(parallel - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(gt:CC (match_operand:VEC_A 1 "vlogical_operand" "") (match_operand:VEC_A 2 "vlogical_operand" ""))] UNSPEC_PREDICATE)) @@ -498,7 +870,7 @@ (define_expand "vector_ge_<mode>_p" [(parallel - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(ge:CC (match_operand:VEC_F 1 "vfloat_operand" "") (match_operand:VEC_F 2 "vfloat_operand" ""))] UNSPEC_PREDICATE)) @@ -510,7 +882,7 @@ (define_expand "vector_gtu_<mode>_p" [(parallel - [(set (reg:CC 74) + [(set (reg:CC CR6_REGNO) (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" ""))] UNSPEC_PREDICATE)) @@ -522,79 +894,83 @@ ;; AltiVec/VSX predicates. +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals zero (aka __CR6_EQ in altivec.h). (define_expand "cr6_test_for_zero" [(set (match_operand:SI 0 "register_operand" "=r") - (eq:SI (reg:CC 74) + (eq:SI (reg:CC CR6_REGNO) (const_int 0)))] "TARGET_ALTIVEC || TARGET_VSX" "") +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals one (aka __CR6_EQ_REV in altivec.h). (define_expand "cr6_test_for_zero_reverse" [(set (match_operand:SI 0 "register_operand" "=r") - (eq:SI (reg:CC 74) + (eq:SI (reg:CC CR6_REGNO) (const_int 0))) - (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] "TARGET_ALTIVEC || TARGET_VSX" "") +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals two (aka __CR6_LT in altivec.h). (define_expand "cr6_test_for_lt" [(set (match_operand:SI 0 "register_operand" "=r") - (lt:SI (reg:CC 74) + (lt:SI (reg:CC CR6_REGNO) (const_int 0)))] "TARGET_ALTIVEC || TARGET_VSX" "") +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals three +;; (aka __CR6_LT_REV in altivec.h). (define_expand "cr6_test_for_lt_reverse" [(set (match_operand:SI 0 "register_operand" "=r") - (lt:SI (reg:CC 74) + (lt:SI (reg:CC CR6_REGNO) (const_int 0))) - (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] "TARGET_ALTIVEC || TARGET_VSX" "") -;; Vector logical instructions -(define_expand "xor<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") +;; Vector count leading zeros +(define_expand "clz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") -(define_expand "ior<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") +;; Vector count trailing zeros +(define_expand "ctz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (ctz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P9_VECTOR") -(define_expand "and<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") - -(define_expand "one_cmpl<mode>2" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") +;; Vector population count +(define_expand "popcount<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") -(define_expand "nor<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" ""))))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") +;; Vector parity +(define_expand "parity<mode>2" + [(set (match_operand:VEC_IP 0 "register_operand" "") + (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand" "")))] + "TARGET_P9_VECTOR") -(define_expand "andc<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" "")) - (match_operand:VEC_L 1 "vlogical_operand" "")))] - "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" - "") - + ;; Same size conversions (define_expand "float<VEC_int><mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") @@ -609,7 +985,7 @@ } }") -(define_expand "unsigned_float<VEC_int><mode>2" +(define_expand "floatuns<VEC_int><mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (unsigned_float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" @@ -650,7 +1026,7 @@ ;; Vector initialization, set, extract -(define_expand "vec_init<mode>" +(define_expand "vec_init<mode><VEC_base_l>" [(match_operand:VEC_E 0 "vlogical_operand" "") (match_operand:VEC_E 1 "" "")] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" @@ -669,72 +1045,15 @@ DONE; }) -(define_expand "vec_extract<mode>" +(define_expand "vec_extract<mode><VEC_base_l>" [(match_operand:<VEC_base> 0 "register_operand" "") (match_operand:VEC_E 1 "vlogical_operand" "") (match_operand 2 "const_int_operand" "")] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" { - rs6000_expand_vector_extract (operands[0], operands[1], - INTVAL (operands[2])); + rs6000_expand_vector_extract (operands[0], operands[1], operands[2]); DONE; }) - -;; Interleave patterns -(define_expand "vec_interleave_highv4sf" - [(set (match_operand:V4SF 0 "vfloat_operand" "") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" - "") - -(define_expand "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "vfloat_operand" "") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" - "") - -(define_expand "vec_interleave_high<mode>" - [(set (match_operand:VEC_64 0 "vfloat_operand" "") - (vec_concat:VEC_64 - (vec_select:<VEC_base> (match_operand:VEC_64 1 "vfloat_operand" "") - (parallel [(const_int 0)])) - (vec_select:<VEC_base> (match_operand:VEC_64 2 "vfloat_operand" "") - (parallel [(const_int 0)]))))] - "VECTOR_UNIT_VSX_P (<MODE>mode)" - "") - -(define_expand "vec_interleave_low<mode>" - [(set (match_operand:VEC_64 0 "vfloat_operand" "") - (vec_concat:VEC_64 - (vec_select:<VEC_base> (match_operand:VEC_64 1 "vfloat_operand" "") - (parallel [(const_int 1)])) - (vec_select:<VEC_base> (match_operand:VEC_64 2 "vfloat_operand" "") - (parallel [(const_int 1)]))))] - "VECTOR_UNIT_VSX_P (<MODE>mode)" - "") - ;; Convert double word types to single word types (define_expand "vec_pack_trunc_v2df" @@ -748,7 +1067,7 @@ emit_insn (gen_vsx_xvcvdpsp (r1, operands[1])); emit_insn (gen_vsx_xvcvdpsp (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -763,7 +1082,7 @@ emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1])); emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -778,7 +1097,7 @@ emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1])); emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -790,7 +1109,7 @@ { rtx reg = gen_reg_rtx (V4SFmode); - emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); DONE; }) @@ -802,7 +1121,7 @@ { rtx reg = gen_reg_rtx (V4SFmode); - emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); DONE; }) @@ -814,7 +1133,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); DONE; }) @@ -826,7 +1145,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); DONE; }) @@ -838,7 +1157,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); DONE; }) @@ -850,7 +1169,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); DONE; }) @@ -864,66 +1183,37 @@ (match_operand:V16QI 3 "vlogical_operand" "")] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" { - emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2], - operands[3])); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], + operands[2], operands[3])); + else + { + /* We have changed lvsr to lvsl, so to complete the transformation + of vperm for LE, we must swap the inputs. */ + rtx unspec = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[2], + operands[1], operands[3]), + UNSPEC_VPERM); + emit_move_insn (operands[0], unspec); + } DONE; }) ;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned ;; since the load already handles it. (define_expand "movmisalign<mode>" - [(set (match_operand:VEC_N 0 "vfloat_operand" "") - (match_operand:VEC_N 1 "vfloat_operand" ""))] + [(set (match_operand:VEC_N 0 "nonimmediate_operand" "") + (match_operand:VEC_N 1 "any_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN" "") - -;; Vector shift left in bits. Currently supported ony for shift -;; amounts that can be expressed as byte shifts (divisible by 8). -;; General shift amounts can be supported using vslo + vsl. We're -;; not expecting to see these yet (the vectorizer currently -;; generates only shifts divisible by byte_size). -(define_expand "vec_shl_<mode>" - [(match_operand:VEC_L 0 "vlogical_operand" "") - (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:QI 2 "reg_or_short_operand" "")] - "TARGET_ALTIVEC" - " -{ - rtx bitshift = operands[2]; - rtx shift; - rtx insn; - HOST_WIDE_INT bitshift_val; - HOST_WIDE_INT byteshift_val; - - if (! CONSTANT_P (bitshift)) - FAIL; - bitshift_val = INTVAL (bitshift); - if (bitshift_val & 0x7) - FAIL; - byteshift_val = bitshift_val >> 3; - if (TARGET_VSX && (byteshift_val & 0x3) == 0) - { - shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); - insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1], - shift); - } - else - { - shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], - shift); - } - - emit_insn (insn); - DONE; -}") - ;; Vector shift right in bits. Currently supported ony for shift ;; amounts that can be expressed as byte shifts (divisible by 8). ;; General shift amounts can be supported using vsro + vsr. We're ;; not expecting to see these yet (the vectorizer currently -;; generates only shifts divisible by byte_size). +;; generates only shifts by a whole number of vector elements). +;; Note that the vec_shr operation is actually defined as +;; 'shift toward element 0' so is a shr for LE and shl for BE. (define_expand "vec_shr_<mode>" [(match_operand:VEC_L 0 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "") @@ -934,6 +1224,7 @@ rtx bitshift = operands[2]; rtx shift; rtx insn; + rtx zero_reg, op1, op2; HOST_WIDE_INT bitshift_val; HOST_WIDE_INT byteshift_val; @@ -942,18 +1233,30 @@ bitshift_val = INTVAL (bitshift); if (bitshift_val & 0x7) FAIL; - byteshift_val = 16 - (bitshift_val >> 3); + byteshift_val = (bitshift_val >> 3); + zero_reg = gen_reg_rtx (<MODE>mode); + emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode)); + if (!BYTES_BIG_ENDIAN) + { + byteshift_val = 16 - byteshift_val; + op1 = zero_reg; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = zero_reg; + } + if (TARGET_VSX && (byteshift_val & 0x3) == 0) { shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); - insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift); } else { shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift); } emit_insn (insn); @@ -965,7 +1268,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for arithmetic shift left on each vector element @@ -973,7 +1276,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for logical shift right on each vector element @@ -981,7 +1284,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for arithmetic shift right on each vector element @@ -989,99 +1292,30 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") -;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems. - -(define_expand "absv2sf2" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - "") - -(define_expand "negv2sf2" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - "") +;; Vector reduction expanders for VSX +; The (VEC_reduc:... +; (op1) +; (unspec:... [(const_int 0)] UNSPEC_REDUC)) +; +; is to allow us to use a code iterator, but not completely list all of the +; vector rotates, etc. to prevent canonicalization -(define_expand "addv2sf3" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - " -{ - if (TARGET_SPE) - { - /* We need to make a note that we clobber SPEFSCR. */ - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); - - XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_PLUS (V2SFmode, operands[1], operands[2])); - XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); - emit_insn (par); - DONE; - } -}") - -(define_expand "subv2sf3" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - " -{ - if (TARGET_SPE) - { - /* We need to make a note that we clobber SPEFSCR. */ - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); - XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_MINUS (V2SFmode, operands[1], operands[2])); - XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); - emit_insn (par); - DONE; - } -}") - -(define_expand "mulv2sf3" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - " -{ - if (TARGET_SPE) - { - /* We need to make a note that we clobber SPEFSCR. */ - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); - - XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_MULT (V2SFmode, operands[1], operands[2])); - XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); - emit_insn (par); - DONE; - } -}") - -(define_expand "divv2sf3" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "") - (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "")))] - "TARGET_PAIRED_FLOAT || TARGET_SPE" - " -{ - if (TARGET_SPE) - { - /* We need to make a note that we clobber SPEFSCR. */ - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); - - XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_DIV (V2SFmode, operands[1], operands[2])); - XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); - emit_insn (par); - DONE; - } -}") +(define_expand "reduc_<VEC_reduc:VEC_reduc_name>_scal_<VEC_F:mode>" + [(match_operand:<VEC_base> 0 "register_operand" "") + (VEC_reduc:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (unspec:VEC_F [(const_int 0)] UNSPEC_REDUC))] + "VECTOR_UNIT_VSX_P (<VEC_F:MODE>mode)" + { + rtx vec = gen_reg_rtx (<VEC_F:MODE>mode); + rtx elt = BYTES_BIG_ENDIAN + ? gen_int_mode (GET_MODE_NUNITS (<VEC_F:MODE>mode) - 1, QImode) + : const0_rtx; + emit_insn (gen_vsx_reduc_<VEC_reduc:VEC_reduc_name>_<VEC_F:mode> (vec, + operand1)); + emit_insn (gen_vsx_extract_<VEC_F:mode> (operand0, vec, elt)); + DONE; + })