Mercurial > hg > CbC > CbC_gcc
diff gcc/config/aarch64/aarch64-sve-builtins-base.cc @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc Thu Feb 13 11:34:05 2020 +0900 @@ -0,0 +1,2820 @@ +/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) + Copyright (C) 2018-2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "tm_p.h" +#include "memmodel.h" +#include "insn-codes.h" +#include "optabs.h" +#include "recog.h" +#include "expr.h" +#include "basic-block.h" +#include "function.h" +#include "fold-const.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "gimplify.h" +#include "explow.h" +#include "emit-rtl.h" +#include "tree-vector-builder.h" +#include "rtx-vector-builder.h" +#include "vec-perm-indices.h" +#include "aarch64-sve-builtins.h" +#include "aarch64-sve-builtins-shapes.h" +#include "aarch64-sve-builtins-base.h" +#include "aarch64-sve-builtins-functions.h" + +using namespace aarch64_sve; + +namespace { + +/* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */ +static int +unspec_cmla (int rot) +{ + switch (rot) + { + case 0: return UNSPEC_CMLA; + case 90: return UNSPEC_CMLA90; + case 180: return UNSPEC_CMLA180; + case 270: return UNSPEC_CMLA270; + default: gcc_unreachable (); + } +} + +/* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */ +static int +unspec_fcmla (int rot) +{ + switch (rot) + { + case 0: return UNSPEC_FCMLA; + case 90: return UNSPEC_FCMLA90; + case 180: return UNSPEC_FCMLA180; + case 270: return UNSPEC_FCMLA270; + default: gcc_unreachable (); + } +} + +/* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */ +static int +unspec_cond_fcmla (int rot) +{ + switch (rot) + { + case 0: return UNSPEC_COND_FCMLA; + case 90: return UNSPEC_COND_FCMLA90; + case 180: return UNSPEC_COND_FCMLA180; + case 270: return UNSPEC_COND_FCMLA270; + default: gcc_unreachable (); + } +} + +/* Expand a call to svmad, or svmla after reordering its operands. + Make _m forms merge with argument MERGE_ARGNO. */ +static rtx +expand_mad (function_expander &e, + unsigned int merge_argno = DEFAULT_MERGE_ARGNO) +{ + if (e.pred == PRED_x) + { + insn_code icode; + if (e.type_suffix (0).integer_p) + icode = code_for_aarch64_pred_fma (e.vector_mode (0)); + else + icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); + return e.use_pred_x_insn (icode); + } + + insn_code icode = e.direct_optab_handler (cond_fma_optab); + return e.use_cond_insn (icode, merge_argno); +} + +/* Expand a call to svmla_lane or svmls_lane using floating-point unspec + UNSPEC. */ +static rtx +expand_mla_mls_lane (function_expander &e, int unspec) +{ + /* Put the operands in the normal (fma ...) order, with the accumulator + last. This fits naturally since that's also the unprinted operand + in the asm output. */ + e.rotate_inputs_left (0, 4); + insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); + return e.use_exact_insn (icode); +} + +/* Expand a call to svmsb, or svmls after reordering its operands. + Make _m forms merge with argument MERGE_ARGNO. */ +static rtx +expand_msb (function_expander &e, + unsigned int merge_argno = DEFAULT_MERGE_ARGNO) +{ + if (e.pred == PRED_x) + { + insn_code icode; + if (e.type_suffix (0).integer_p) + icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); + else + icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); + return e.use_pred_x_insn (icode); + } + + insn_code icode = e.direct_optab_handler (cond_fnma_optab); + return e.use_cond_insn (icode, merge_argno); +} + +class svabd_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* The integer operations are represented as the subtraction of the + minimum from the maximum, with the signedness of the instruction + keyed off the signedness of the maximum operation. */ + rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; + insn_code icode; + if (e.pred == PRED_x) + { + if (e.type_suffix (0).integer_p) + icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); + else + icode = code_for_aarch64_pred_abd (e.vector_mode (0)); + return e.use_pred_x_insn (icode); + } + + if (e.type_suffix (0).integer_p) + icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); + else + icode = code_for_aarch64_cond_abd (e.vector_mode (0)); + return e.use_cond_insn (icode); + } +}; + +/* Implements svacge, svacgt, svacle and svaclt. */ +class svac_impl : public function_base +{ +public: + CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + e.add_ptrue_hint (0, e.gp_mode (0)); + insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); + return e.use_exact_insn (icode); + } + + /* The unspec code for the underlying comparison. */ + int m_unspec; +}; + +class svadda_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Put the predicate last, as required by mask_fold_left_plus_optab. */ + e.rotate_inputs_left (0, 3); + machine_mode mode = e.vector_mode (0); + insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); + return e.use_exact_insn (icode); + } +}; + +/* Implements svadr[bhwd]. */ +class svadr_bhwd_impl : public function_base +{ +public: + CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = GET_MODE (e.args[0]); + if (m_shift == 0) + return e.use_exact_insn (code_for_aarch64_adr (mode)); + + /* Turn the access size into an extra shift argument. */ + rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); + e.args.quick_push (expand_vector_broadcast (mode, shift)); + return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); + } + + /* How many bits left to shift the vector displacement. */ + unsigned int m_shift; +}; + +class svbic_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Convert svbic of a constant into svand of its inverse. */ + if (CONST_INT_P (e.args[2])) + { + machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); + e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); + return e.map_to_rtx_codes (AND, AND, -1); + } + + if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) + { + gcc_assert (e.pred == PRED_z); + return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); + } + + if (e.pred == PRED_x) + return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); + + return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); + } +}; + +/* Implements svbrkn, svbrkpa and svbrkpb. */ +class svbrk_binary_impl : public function_base +{ +public: + CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); + } + + /* The unspec code associated with the operation. */ + int m_unspec; +}; + +/* Implements svbrka and svbrkb. */ +class svbrk_unary_impl : public function_base +{ +public: + CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); + } + + /* The unspec code associated with the operation. */ + int m_unspec; +}; + +class svcadd_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Convert the rotation amount into a specific unspec. */ + int rot = INTVAL (e.args.pop ()); + if (rot == 90) + return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90, + UNSPEC_COND_FCADD90); + if (rot == 270) + return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270, + UNSPEC_COND_FCADD270); + gcc_unreachable (); + } +}; + +/* Implements svclasta and svclastb. */ +class svclast_impl : public quiet<function_base> +{ +public: + CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* Match the fold_extract_optab order. */ + std::swap (e.args[0], e.args[1]); + machine_mode mode = e.vector_mode (0); + insn_code icode; + if (e.mode_suffix_id == MODE_n) + icode = code_for_fold_extract (m_unspec, mode); + else + icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); + return e.use_exact_insn (icode); + } + + /* The unspec code associated with the operation. */ + int m_unspec; +}; + +class svcmla_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Convert the rotation amount into a specific unspec. */ + int rot = INTVAL (e.args.pop ()); + if (e.type_suffix (0).float_p) + { + /* Make the operand order the same as the one used by the fma optabs, + with the accumulator last. */ + e.rotate_inputs_left (1, 4); + return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3); + } + else + { + int cmla = unspec_cmla (rot); + return e.map_to_unspecs (cmla, cmla, -1); + } + } +}; + +class svcmla_lane_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Convert the rotation amount into a specific unspec. */ + int rot = INTVAL (e.args.pop ()); + machine_mode mode = e.vector_mode (0); + if (e.type_suffix (0).float_p) + { + /* Make the operand order the same as the one used by the fma optabs, + with the accumulator last. */ + e.rotate_inputs_left (0, 4); + insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode); + return e.use_exact_insn (icode); + } + else + { + insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode); + return e.use_exact_insn (icode); + } + } +}; + +/* Implements svcmp<cc> (except svcmpuo, which is handled separately). */ +class svcmp_impl : public function_base +{ +public: + CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) + : m_code (code), m_unspec_for_fp (unspec_for_fp) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree pg = gimple_call_arg (f.call, 0); + tree rhs1 = gimple_call_arg (f.call, 1); + tree rhs2 = gimple_call_arg (f.call, 2); + + /* Convert a ptrue-predicated integer comparison into the corresponding + gimple-level operation. */ + if (integer_all_onesp (pg) + && f.type_suffix (0).element_bytes == 1 + && f.type_suffix (0).integer_p) + { + gimple_seq stmts = NULL; + rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (f.lhs, m_code, rhs1, rhs2); + } + + return NULL; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + + /* Comparisons are UNSPEC_PRED_Z operations and so need a hint + operand. */ + e.add_ptrue_hint (0, e.gp_mode (0)); + + if (e.type_suffix (0).integer_p) + { + bool unsigned_p = e.type_suffix (0).unsigned_p; + rtx_code code = get_rtx_code (m_code, unsigned_p); + return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); + } + + insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); + return e.use_exact_insn (icode); + } + + /* The tree code associated with the comparison. */ + tree_code m_code; + + /* The unspec code to use for floating-point comparisons. */ + int m_unspec_for_fp; +}; + +/* Implements svcmp<cc>_wide. */ +class svcmp_wide_impl : public function_base +{ +public: + CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, + int unspec_for_uint) + : m_code (code), m_unspec_for_sint (unspec_for_sint), + m_unspec_for_uint (unspec_for_uint) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + bool unsigned_p = e.type_suffix (0).unsigned_p; + rtx_code code = get_rtx_code (m_code, unsigned_p); + + /* Comparisons are UNSPEC_PRED_Z operations and so need a hint + operand. */ + e.add_ptrue_hint (0, e.gp_mode (0)); + + /* If the argument is a constant that the unwidened comparisons + can handle directly, use them instead. */ + insn_code icode = code_for_aarch64_pred_cmp (code, mode); + rtx op2 = unwrap_const_vec_duplicate (e.args[3]); + if (CONSTANT_P (op2) + && insn_data[icode].operand[4].predicate (op2, DImode)) + { + e.args[3] = op2; + return e.use_exact_insn (icode); + } + + int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); + return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); + } + + /* The tree code associated with the comparison. */ + tree_code m_code; + + /* The unspec codes for signed and unsigned wide comparisons + respectively. */ + int m_unspec_for_sint; + int m_unspec_for_uint; +}; + +class svcmpuo_impl : public quiet<function_base> +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + e.add_ptrue_hint (0, e.gp_mode (0)); + return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); + } +}; + +class svcnot_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + if (e.pred == PRED_x) + { + /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs + a ptrue hint. */ + e.add_ptrue_hint (0, e.gp_mode (0)); + return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); + } + + return e.use_cond_insn (code_for_cond_cnot (mode), 0); + } +}; + +/* Implements svcnt[bhwd], which count the number of elements + in a particular vector mode. */ +class svcnt_bhwd_impl : public function_base +{ +public: + CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree count = build_int_cstu (TREE_TYPE (f.lhs), + GET_MODE_NUNITS (m_ref_mode)); + return gimple_build_assign (f.lhs, count); + } + + rtx + expand (function_expander &) const OVERRIDE + { + return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); + } + + /* The mode of the vector associated with the [bhwd] suffix. */ + machine_mode m_ref_mode; +}; + +/* Implements svcnt[bhwd]_pat. */ +class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl +{ +public: + CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode) + : svcnt_bhwd_impl (ref_mode) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree pattern_arg = gimple_call_arg (f.call, 0); + aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); + + if (pattern == AARCH64_SV_ALL) + /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ + return svcnt_bhwd_impl::fold (f); + + /* See whether we can count the number of elements in the pattern + at compile time. */ + unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); + HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); + if (value >= 0) + { + tree count = build_int_cstu (TREE_TYPE (f.lhs), value); + return gimple_build_assign (f.lhs, count); + } + + return NULL; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); + e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); + e.args.quick_push (const1_rtx); + return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); + } +}; + +class svcntp_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + e.add_ptrue_hint (0, mode); + return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); + } +}; + +/* Implements svcreate2, svcreate3 and svcreate4. */ +class svcreate_impl : public quiet<multi_vector_function> +{ +public: + CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple) + : quiet<multi_vector_function> (vectors_per_tuple) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + unsigned int nargs = gimple_call_num_args (f.call); + tree lhs_type = TREE_TYPE (f.lhs); + + /* Replace the call with a clobber of the result (to prevent it from + becoming upwards exposed) followed by stores into each individual + vector of tuple. + + The fold routines expect the replacement statement to have the + same lhs as the original call, so return the clobber statement + rather than the final vector store. */ + gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); + + for (unsigned int i = nargs; i-- > 0; ) + { + tree rhs_vector = gimple_call_arg (f.call, i); + tree field = tuple_type_field (TREE_TYPE (f.lhs)); + tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), + unshare_expr (f.lhs), field, NULL_TREE); + tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), + lhs_array, size_int (i), + NULL_TREE, NULL_TREE); + gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); + gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); + } + return clobber; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + rtx lhs_tuple = e.get_nonoverlapping_reg_target (); + + /* Record that LHS_TUPLE is dead before the first store. */ + emit_clobber (lhs_tuple); + for (unsigned int i = 0; i < e.args.length (); ++i) + { + /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ + rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), + lhs_tuple, GET_MODE (lhs_tuple), + i * BYTES_PER_SVE_VECTOR); + emit_move_insn (lhs_vector, e.args[i]); + } + return lhs_tuple; + } +}; + +class svcvt_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode0 = e.vector_mode (0); + machine_mode mode1 = e.vector_mode (1); + insn_code icode; + /* All this complication comes from the need to select four things + simultaneously: + + (1) the kind of conversion (int<-float, float<-int, float<-float) + (2) signed vs. unsigned integers, where relevant + (3) the predication mode, which must be the wider of the predication + modes for MODE0 and MODE1 + (4) the predication type (m, x or z) + + The only supported int<->float conversions for which the integer is + narrower than the float are SI<->DF. It's therefore more convenient + to handle (3) by defining two patterns for int<->float conversions: + one in which the integer is at least as wide as the float and so + determines the predication mode, and another single SI<->DF pattern + in which the float's mode determines the predication mode (which is + always VNx2BI in that case). + + The names of the patterns follow the optab convention of giving + the source mode before the destination mode. */ + if (e.type_suffix (1).integer_p) + { + int unspec = (e.type_suffix (1).unsigned_p + ? UNSPEC_COND_UCVTF + : UNSPEC_COND_SCVTF); + if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) + icode = (e.pred == PRED_x + ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) + : code_for_cond_nonextend (unspec, mode1, mode0)); + else + icode = (e.pred == PRED_x + ? code_for_aarch64_sve_extend (unspec, mode1, mode0) + : code_for_cond_extend (unspec, mode1, mode0)); + } + else + { + int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT + : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU + : UNSPEC_COND_FCVTZS); + if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) + icode = (e.pred == PRED_x + ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) + : code_for_cond_nontrunc (unspec, mode1, mode0)); + else + icode = (e.pred == PRED_x + ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) + : code_for_cond_trunc (unspec, mode1, mode0)); + } + + if (e.pred == PRED_x) + return e.use_pred_x_insn (icode); + return e.use_cond_insn (icode); + } +}; + +class svdot_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* In the optab, the multiplication operands come before the accumulator + operand. The optab is keyed off the multiplication mode. */ + e.rotate_inputs_left (0, 3); + insn_code icode + = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, + 0, GET_MODE (e.args[0])); + return e.use_unpred_insn (icode); + } +}; + +class svdotprod_lane_impl : public unspec_based_function_base +{ +public: + CONSTEXPR svdotprod_lane_impl (int unspec_for_sint, + int unspec_for_uint, + int unspec_for_float) + : unspec_based_function_base (unspec_for_sint, + unspec_for_uint, + unspec_for_float) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* Use the same ordering as the dot_prod_optab, with the + accumulator last. */ + e.rotate_inputs_left (0, 4); + int unspec = unspec_for (e); + machine_mode mode = e.vector_mode (0); + return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); + } +}; + +class svdup_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree vec_type = TREE_TYPE (f.lhs); + tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); + + if (f.pred == PRED_none || f.pred == PRED_x) + { + if (CONSTANT_CLASS_P (rhs)) + { + if (f.type_suffix (0).bool_p) + return (tree_to_shwi (rhs) + ? f.fold_to_ptrue () + : f.fold_to_pfalse ()); + + tree rhs_vector = build_vector_from_val (vec_type, rhs); + return gimple_build_assign (f.lhs, rhs_vector); + } + + /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we + would need to introduce an extra and unwanted conversion to + the truth vector element type. */ + if (!f.type_suffix (0).bool_p) + return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); + } + + /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */ + if (f.pred == PRED_z) + { + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vec_type, 0); + rhs = f.force_vector (stmts, vec_type, rhs); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs, + build_zero_cst (vec_type)); + } + + return NULL; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + if (e.pred == PRED_none || e.pred == PRED_x) + /* There's no benefit to using predicated instructions for _x here. */ + return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); + + /* Model predicated svdups as a SEL in which the "true" value is + the duplicate of the function argument and the "false" value + is the value of inactive lanes. */ + insn_code icode; + machine_mode mode = e.vector_mode (0); + if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) + /* Duplicate the constant to fill a vector. The pattern optimizes + various cases involving constant operands, falling back to SEL + if necessary. */ + icode = code_for_vcond_mask (mode, mode); + else + /* Use the pattern for selecting between a duplicated scalar + variable and a vector fallback. */ + icode = code_for_aarch64_sel_dup (mode); + return e.use_vcond_mask_insn (icode); + } +}; + +class svdup_lane_impl : public quiet<function_base> +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* The native DUP lane has an index range of 64 bytes. */ + machine_mode mode = e.vector_mode (0); + if (CONST_INT_P (e.args[1]) + && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) + return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); + + /* Treat svdup_lane as if it were svtbl_n. */ + return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); + } +}; + +class svdupq_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree vec_type = TREE_TYPE (f.lhs); + unsigned int nargs = gimple_call_num_args (f.call); + /* For predicates, pad out each argument so that we have one element + per bit. */ + unsigned int factor = (f.type_suffix (0).bool_p + ? f.type_suffix (0).element_bytes : 1); + tree_vector_builder builder (vec_type, nargs * factor, 1); + for (unsigned int i = 0; i < nargs; ++i) + { + tree elt = gimple_call_arg (f.call, i); + if (!CONSTANT_CLASS_P (elt)) + return NULL; + builder.quick_push (elt); + for (unsigned int j = 1; j < factor; ++j) + builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); + } + return gimple_build_assign (f.lhs, builder.build ()); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + unsigned int elements_per_vq = e.args.length (); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + /* Construct a vector of integers so that we can compare them against + zero below. Zero vs. nonzero is the only distinction that + matters. */ + mode = aarch64_sve_int_mode (mode); + for (unsigned int i = 0; i < elements_per_vq; ++i) + e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), + e.args[i], QImode); + } + + /* Get the 128-bit Advanced SIMD vector for this data size. */ + scalar_mode element_mode = GET_MODE_INNER (mode); + machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); + gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); + + /* Put the arguments into a 128-bit Advanced SIMD vector. We want + argument N to go into architectural lane N, whereas Advanced SIMD + vectors are loaded memory lsb to register lsb. We therefore need + to reverse the elements for big-endian targets. */ + rtx vq_reg = gen_reg_rtx (vq_mode); + rtvec vec = rtvec_alloc (elements_per_vq); + for (unsigned int i = 0; i < elements_per_vq; ++i) + { + unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; + RTVEC_ELT (vec, i) = e.args[argno]; + } + aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); + + /* If the result is a boolean, compare the data vector against zero. */ + if (mode != e.vector_mode (0)) + { + rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); + return aarch64_convert_sve_data_to_pred (e.possible_target, + e.vector_mode (0), data_dupq); + } + + return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); + } +}; + +class svdupq_lane_impl : public quiet<function_base> +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + rtx index = e.args[1]; + if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) + { + /* Use the .Q form of DUP, which is the native instruction for + this function. */ + insn_code icode = code_for_aarch64_sve_dupq_lane (mode); + unsigned int num_indices = e.elements_per_vq (0); + rtx indices = aarch64_gen_stepped_int_parallel + (num_indices, INTVAL (index) * num_indices, 1); + + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[0]); + e.add_fixed_operand (indices); + return e.generate_insn (icode); + } + + /* Build a .D TBL index for the pairs of doublewords that we want to + duplicate. */ + if (CONST_INT_P (index)) + { + /* The index vector is a constant. */ + rtx_vector_builder builder (VNx2DImode, 2, 1); + builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); + builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); + index = builder.build (); + } + else + { + /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec + explicitly allows the top of the index to be dropped. */ + index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, + index, const1_rtx)); + index = expand_vector_broadcast (VNx2DImode, index); + + /* Get an alternating 0, 1 predicate. */ + rtx_vector_builder builder (VNx2BImode, 2, 1); + builder.quick_push (const0_rtx); + builder.quick_push (constm1_rtx); + rtx pg = force_reg (VNx2BImode, builder.build ()); + + /* Add one to the odd elements of the index. */ + rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); + rtx target = gen_reg_rtx (VNx2DImode); + emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); + index = target; + } + + e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); + e.args[1] = index; + return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); + } +}; + +/* Implements svextb, svexth and svextw. */ +class svext_bhw_impl : public function_base +{ +public: + CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) + : m_from_mode (from_mode) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + if (e.type_suffix (0).unsigned_p) + { + /* Convert to an AND. The widest we go is 0xffffffff, which fits + in a CONST_INT. */ + e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); + if (e.pred == PRED_m) + /* We now have arguments "(inactive, pg, op, mask)". Convert this + to "(pg, op, mask, inactive)" so that the order matches svand_m + with an extra argument on the end. Take the inactive elements + from this extra argument. */ + e.rotate_inputs_left (0, 4); + return e.map_to_rtx_codes (AND, AND, -1, 3); + } + + machine_mode wide_mode = e.vector_mode (0); + poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); + machine_mode narrow_mode + = aarch64_sve_data_mode (m_from_mode, nunits).require (); + if (e.pred == PRED_x) + { + insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); + return e.use_pred_x_insn (icode); + } + + insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); + return e.use_cond_insn (icode); + } + + /* The element mode that we're extending from. */ + scalar_int_mode m_from_mode; +}; + +/* Implements svget2, svget3 and svget4. */ +class svget_impl : public quiet<multi_vector_function> +{ +public: + CONSTEXPR svget_impl (unsigned int vectors_per_tuple) + : quiet<multi_vector_function> (vectors_per_tuple) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* Fold into a normal gimple component access. */ + tree rhs_tuple = gimple_call_arg (f.call, 0); + tree index = gimple_call_arg (f.call, 1); + tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); + tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), + rhs_tuple, field, NULL_TREE); + tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), + rhs_array, index, NULL_TREE, NULL_TREE); + return gimple_build_assign (f.lhs, rhs_vector); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* Fold the access into a subreg rvalue. */ + return simplify_gen_subreg (e.vector_mode (0), e.args[0], + GET_MODE (e.args[0]), + INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); + } +}; + +class svindex_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); + } +}; + +class svinsr_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, + gimple_call_arg (f.call, 0), + gimple_call_arg (f.call, 1)); + gimple_call_set_lhs (new_call, f.lhs); + return new_call; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = direct_optab_handler (vec_shl_insert_optab, + e.vector_mode (0)); + return e.use_exact_insn (icode); + } +}; + +/* Implements svlasta and svlastb. */ +class svlast_impl : public quiet<function_base> +{ +public: + CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); + } + + /* The unspec code associated with the operation. */ + int m_unspec; +}; + +class svld1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY; + } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree vectype = f.vector_type (0); + + /* Get the predicate and base pointer. */ + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vectype, 0); + tree base = f.fold_contiguous_base (stmts, vectype); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + + tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); + gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, + base, cookie, pred); + gimple_call_set_lhs (new_call, f.lhs); + return new_call; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = convert_optab_handler (maskload_optab, + e.vector_mode (0), e.gp_mode (0)); + return e.use_contiguous_load_insn (icode); + } +}; + +/* Implements extending contiguous forms of svld1. */ +class svld1_extend_impl : public extending_load +{ +public: + CONSTEXPR svld1_extend_impl (type_suffix_index memory_type) + : extending_load (memory_type) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_load (extend_rtx_code (), + e.vector_mode (0), + e.memory_vector_mode ()); + return e.use_contiguous_load_insn (icode); + } +}; + +class svld1_gather_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_gather_address_operands (1); + /* Put the predicate last, as required by mask_gather_load_optab. */ + e.rotate_inputs_left (0, 5); + machine_mode mem_mode = e.memory_vector_mode (); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_gather_load_optab, + mem_mode, int_mode); + return e.use_exact_insn (icode); + } +}; + +/* Implements extending forms of svld1_gather. */ +class svld1_gather_extend_impl : public extending_load +{ +public: + CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type) + : extending_load (memory_type) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_gather_address_operands (1); + /* Put the predicate last, since the extending gathers use the same + operand order as mask_gather_load_optab. */ + e.rotate_inputs_left (0, 5); + /* Add a constant predicate for the extension rtx. */ + e.args.quick_push (CONSTM1_RTX (VNx16BImode)); + insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), + e.vector_mode (0), + e.memory_vector_mode ()); + return e.use_exact_insn (icode); + } +}; + +class load_replicate : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY; + } + + tree + memory_scalar_type (const function_instance &fi) const OVERRIDE + { + return fi.scalar_type (0); + } +}; + +class svld1rq_impl : public load_replicate +{ +public: + machine_mode + memory_vector_mode (const function_instance &fi) const OVERRIDE + { + return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); + } +}; + +class svld1ro_impl : public load_replicate +{ +public: + machine_mode + memory_vector_mode (const function_instance &) const OVERRIDE + { + return OImode; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); + } +}; + +/* Implements svld2, svld3 and svld4. */ +class svld234_impl : public full_width_access +{ +public: + CONSTEXPR svld234_impl (unsigned int vectors_per_tuple) + : full_width_access (vectors_per_tuple) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY; + } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree tuple_type = TREE_TYPE (f.lhs); + tree vectype = f.vector_type (0); + + /* Get the predicate and base pointer. */ + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vectype, 0); + tree base = f.fold_contiguous_base (stmts, vectype); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + + /* Emit two statements: a clobber of the lhs, so that it isn't + upwards exposed, and then the load itself. + + The fold routines expect the replacement statement to have the + same lhs as the original call, so return the clobber statement + rather than the load. */ + gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); + + /* View the loaded data as an array of vectors. */ + tree field = tuple_type_field (tuple_type); + tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), + unshare_expr (f.lhs)); + + /* Emit the load itself. */ + tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); + gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, + base, cookie, pred); + gimple_call_set_lhs (new_call, lhs_array); + gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); + + return clobber; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); + insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, + tuple_mode, e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); + } +}; + +class svldff1_gather_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See the block comment in aarch64-sve.md for details about the + FFR handling. */ + emit_insn (gen_aarch64_update_ffr_for_load ()); + + e.prepare_gather_address_operands (1); + /* Put the predicate last, since ldff1_gather uses the same operand + order as mask_gather_load_optab. */ + e.rotate_inputs_left (0, 5); + machine_mode mem_mode = e.memory_vector_mode (); + return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); + } +}; + +/* Implements extending forms of svldff1_gather. */ +class svldff1_gather_extend : public extending_load +{ +public: + CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type) + : extending_load (memory_type) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See the block comment in aarch64-sve.md for details about the + FFR handling. */ + emit_insn (gen_aarch64_update_ffr_for_load ()); + + e.prepare_gather_address_operands (1); + /* Put the predicate last, since ldff1_gather uses the same operand + order as mask_gather_load_optab. */ + e.rotate_inputs_left (0, 5); + /* Add a constant predicate for the extension rtx. */ + e.args.quick_push (CONSTM1_RTX (VNx16BImode)); + insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), + e.vector_mode (0), + e.memory_vector_mode ()); + return e.use_exact_insn (icode); + } +}; + +class svldnt1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); + } +}; + +/* Implements svldff1 and svldnf1. */ +class svldxf1_impl : public full_width_access +{ +public: + CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See the block comment in aarch64-sve.md for details about the + FFR handling. */ + emit_insn (gen_aarch64_update_ffr_for_load ()); + + machine_mode mode = e.vector_mode (0); + return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); + } + + /* The unspec associated with the load. */ + int m_unspec; +}; + +/* Implements extending contiguous forms of svldff1 and svldnf1. */ +class svldxf1_extend_impl : public extending_load +{ +public: + CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) + : extending_load (memory_type), m_unspec (unspec) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See the block comment in aarch64-sve.md for details about the + FFR handling. */ + emit_insn (gen_aarch64_update_ffr_for_load ()); + + insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), + e.vector_mode (0), + e.memory_vector_mode ()); + return e.use_contiguous_load_insn (icode); + } + + /* The unspec associated with the load. */ + int m_unspec; +}; + +class svlen_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* The argument only exists for its type. */ + tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); + tree count = build_int_cstu (TREE_TYPE (f.lhs), + TYPE_VECTOR_SUBPARTS (rhs_type)); + return gimple_build_assign (f.lhs, count); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* The argument only exists for its type. */ + return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); + } +}; + +class svmad_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + return expand_mad (e); + } +}; + +class svmla_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Put the accumulator at the end (argument 3), but keep it as the + merge input for _m functions. */ + e.rotate_inputs_left (1, 4); + return expand_mad (e, 3); + } +}; + +class svmla_lane_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + if (e.type_suffix (0).integer_p) + { + machine_mode mode = e.vector_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode)); + } + return expand_mla_mls_lane (e, UNSPEC_FMLA); + } +}; + +class svmls_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + /* Put the accumulator at the end (argument 3), but keep it as the + merge input for _m functions. */ + e.rotate_inputs_left (1, 4); + return expand_msb (e, 3); + } +}; + +class svmov_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + return gimple_build_assign (f.lhs, BIT_AND_EXPR, + gimple_call_arg (f.call, 0), + gimple_call_arg (f.call, 1)); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" + is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ + gcc_assert (e.pred == PRED_z); + e.args.quick_push (e.args[1]); + return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); + } +}; + +class svmls_lane_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + if (e.type_suffix (0).integer_p) + { + machine_mode mode = e.vector_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode)); + } + return expand_mla_mls_lane (e, UNSPEC_FMLS); + } +}; + +class svmmla_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode; + if (e.type_suffix (0).integer_p) + { + if (e.type_suffix (0).unsigned_p) + icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); + else + icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); + } + else + icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); + return e.use_exact_insn (icode); + } +}; + +class svmsb_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + return expand_msb (e); + } +}; + +class svnand_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + gcc_assert (e.pred == PRED_z); + return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); + } +}; + +class svnor_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + gcc_assert (e.pred == PRED_z); + return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); + } +}; + +class svnot_impl : public rtx_code_function +{ +public: + CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) + { + /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" + is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ + gcc_assert (e.pred == PRED_z); + e.args.quick_insert (1, e.args[0]); + return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); + } + return rtx_code_function::expand (e); + } +}; + +class svorn_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + gcc_assert (e.pred == PRED_z); + return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); + } +}; + +class svpfalse_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + return f.fold_to_pfalse (); + } + + rtx + expand (function_expander &) const OVERRIDE + { + return CONST0_RTX (VNx16BImode); + } +}; + +/* Implements svpfirst and svpnext, which share the same .md patterns. */ +class svpfirst_svpnext_impl : public function_base +{ +public: + CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + e.add_ptrue_hint (0, mode); + return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); + } + + /* The unspec associated with the operation. */ + int m_unspec; +}; + +/* Implements contiguous forms of svprf[bhwd]. */ +class svprf_bhwd_impl : public function_base +{ +public: + CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_PREFETCH_MEMORY; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_prefetch_operands (); + insn_code icode = code_for_aarch64_sve_prefetch (m_mode); + return e.use_contiguous_prefetch_insn (icode); + } + + /* The mode that we'd use to hold one vector of prefetched data. */ + machine_mode m_mode; +}; + +/* Implements svprf[bhwd]_gather. */ +class svprf_bhwd_gather_impl : public function_base +{ +public: + CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_PREFETCH_MEMORY; + } + + machine_mode + memory_vector_mode (const function_instance &) const OVERRIDE + { + return m_mode; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_prefetch_operands (); + e.prepare_gather_address_operands (1); + + /* Insert a zero operand to identify the mode of the memory being + accessed. This goes between the gather operands and prefetch + operands created above. */ + e.args.quick_insert (5, CONST0_RTX (m_mode)); + + machine_mode reg_mode = GET_MODE (e.args[2]); + insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); + return e.use_exact_insn (icode); + } + + /* The mode that we'd use to hold one vector of prefetched data. */ + machine_mode m_mode; +}; + +/* Implements svptest_any, svptest_first and svptest_last. */ +class svptest_impl : public function_base +{ +public: + CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See whether GP is an exact ptrue for some predicate mode; + i.e. whether converting the GP to that mode will not drop + set bits and will leave all significant bits set. */ + machine_mode wide_mode; + int hint; + if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) + hint = SVE_KNOWN_PTRUE; + else + { + hint = SVE_MAYBE_NOT_PTRUE; + wide_mode = VNx16BImode; + } + + /* Generate the PTEST itself. */ + rtx pg = force_reg (VNx16BImode, e.args[0]); + rtx wide_pg = gen_lowpart (wide_mode, pg); + rtx hint_rtx = gen_int_mode (hint, DImode); + rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); + emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); + + /* Get the location of the boolean result. We can provide SImode and + DImode values directly; rely on generic code to convert others. */ + rtx target = e.possible_target; + if (!target + || !REG_P (target) + || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) + target = gen_reg_rtx (DImode); + + /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), + cc_reg, const0_rtx); + emit_insn (gen_rtx_SET (target, compare)); + return target; + } + + /* The comparison code associated with ptest condition. */ + rtx_code m_compare; +}; + +class svptrue_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + return f.fold_to_ptrue (); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + return aarch64_ptrue_all (e.type_suffix (0).element_bytes); + } +}; + +class svptrue_pat_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree pattern_arg = gimple_call_arg (f.call, 0); + aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); + + if (pattern == AARCH64_SV_ALL) + /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ + return f.fold_to_ptrue (); + + /* See whether we can count the number of elements in the pattern + at compile time. If so, construct a predicate with that number + of 1s followed by all 0s. */ + int nelts_per_vq = f.elements_per_vq (0); + HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); + if (value >= 0) + return f.fold_to_vl_pred (value); + + return NULL; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* In rtl, the predicate is represented as the constant: + + (const:V16BI (unspec:V16BI [(const_int PATTERN) + (const_vector:VnnBI [zeros])] + UNSPEC_PTRUE)) + + where nn determines the element size. */ + rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); + return gen_rtx_CONST (VNx16BImode, + gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); + } +}; + +/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ +class svqdec_svqinc_bhwd_impl : public function_base +{ +public: + CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, + rtx_code code_for_uint, + scalar_int_mode elem_mode) + : m_code_for_sint (code_for_sint), + m_code_for_uint (code_for_uint), + m_elem_mode (elem_mode) + {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* Treat non-_pat functions in the same way as _pat functions with + an SV_ALL argument. */ + if (e.args.length () == 2) + e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); + + /* Insert the number of elements per 128-bit block as a fake argument, + between the pattern and the multiplier. Arguments 1, 2 and 3 then + correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see + aarch64_sve_cnt_pat for details. */ + unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); + e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); + + rtx_code code = (e.type_suffix (0).unsigned_p + ? m_code_for_uint + : m_code_for_sint); + + /* Choose between operating on integer scalars or integer vectors. */ + machine_mode mode = e.vector_mode (0); + if (e.mode_suffix_id == MODE_n) + mode = GET_MODE_INNER (mode); + return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); + } + + /* The saturating addition or subtraction codes to use for signed and + unsigned values respectively. */ + rtx_code m_code_for_sint; + rtx_code m_code_for_uint; + + /* The integer mode associated with the [bhwd] suffix. */ + scalar_int_mode m_elem_mode; +}; + +/* Implements svqdec[bhwd]{,_pat}. */ +class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl +{ +public: + CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) + : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} +}; + +/* Implements svqinc[bhwd]{,_pat}. */ +class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl +{ +public: + CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) + : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} +}; + +/* Implements svqdecp and svqincp. */ +class svqdecp_svqincp_impl : public function_base +{ +public: + CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, + rtx_code code_for_uint) + : m_code_for_sint (code_for_sint), + m_code_for_uint (code_for_uint) + {} + + rtx + expand (function_expander &e) const OVERRIDE + { + rtx_code code = (e.type_suffix (0).unsigned_p + ? m_code_for_uint + : m_code_for_sint); + insn_code icode; + if (e.mode_suffix_id == MODE_n) + { + /* Increment or decrement a scalar (whose mode is given by the first + type suffix) by the number of active elements in a predicate + (whose mode is given by the second type suffix). */ + machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); + icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); + } + else + /* Increment a vector by the number of active elements in a predicate, + with the vector mode determining the predicate mode. */ + icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); + return e.use_exact_insn (icode); + } + + /* The saturating addition or subtraction codes to use for signed and + unsigned values respectively. */ + rtx_code m_code_for_sint; + rtx_code m_code_for_uint; +}; + +class svrdffr_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_READ_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* See the block comment in aarch64-sve.md for details about the + FFR handling. */ + emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); + rtx result = e.use_exact_insn (e.pred == PRED_z + ? CODE_FOR_aarch64_rdffr_z + : CODE_FOR_aarch64_rdffr); + emit_insn (gen_aarch64_update_ffrt ()); + return result; + } +}; + +class svreinterpret_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* Punt to rtl if the effect of the reinterpret on registers does not + conform to GCC's endianness model. */ + if (!targetm.can_change_mode_class (f.vector_mode (0), + f.vector_mode (1), FP_REGS)) + return NULL; + + /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR + reinterpretation. */ + tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), + gimple_call_arg (f.call, 0)); + return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = e.vector_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); + } +}; + +class svrev_impl : public permute +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* Punt for now on _b16 and wider; we'd need more complex evpc logic + to rerecognize the result. */ + if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) + return NULL; + + /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ + poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); + vec_perm_builder builder (nelts, 1, 3); + for (int i = 0; i < 3; ++i) + builder.quick_push (nelts - i - 1); + return fold_permute (f, builder); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); + } +}; + +class svsel_impl : public quiet<function_base> +{ +public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* svsel corresponds exactly to VEC_COND_EXPR. */ + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, f.vector_type (0), 0); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, + gimple_call_arg (f.call, 1), + gimple_call_arg (f.call, 2)); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ + e.rotate_inputs_left (0, 3); + insn_code icode = convert_optab_handler (vcond_mask_optab, + e.vector_mode (0), + e.gp_mode (0)); + return e.use_exact_insn (icode); + } +}; + +/* Implements svset2, svset3 and svset4. */ +class svset_impl : public quiet<multi_vector_function> +{ +public: + CONSTEXPR svset_impl (unsigned int vectors_per_tuple) + : quiet<multi_vector_function> (vectors_per_tuple) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree rhs_tuple = gimple_call_arg (f.call, 0); + tree index = gimple_call_arg (f.call, 1); + tree rhs_vector = gimple_call_arg (f.call, 2); + + /* Replace the call with two statements: a copy of the full tuple + to the call result, followed by an update of the individual vector. + + The fold routines expect the replacement statement to have the + same lhs as the original call, so return the copy statement + rather than the field update. */ + gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); + + /* Get a reference to the individual vector. */ + tree field = tuple_type_field (TREE_TYPE (f.lhs)); + tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), + f.lhs, field, NULL_TREE); + tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), + lhs_array, index, NULL_TREE, NULL_TREE); + gassign *update = gimple_build_assign (lhs_vector, rhs_vector); + gsi_insert_after (f.gsi, update, GSI_SAME_STMT); + + return copy; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + rtx rhs_tuple = e.args[0]; + unsigned int index = INTVAL (e.args[1]); + rtx rhs_vector = e.args[2]; + + /* First copy the full tuple to the target register. */ + rtx lhs_tuple = e.get_nonoverlapping_reg_target (); + emit_move_insn (lhs_tuple, rhs_tuple); + + /* ...then update the individual vector. */ + rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), + lhs_tuple, GET_MODE (lhs_tuple), + index * BYTES_PER_SVE_VECTOR); + emit_move_insn (lhs_vector, rhs_vector); + return lhs_vector; + } +}; + +class svsetffr_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + e.args.quick_push (CONSTM1_RTX (VNx16BImode)); + return e.use_exact_insn (CODE_FOR_aarch64_wrffr); + } +}; + +class svst1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_MEMORY; + } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree vectype = f.vector_type (0); + + /* Get the predicate and base pointer. */ + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vectype, 0); + tree base = f.fold_contiguous_base (stmts, vectype); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + + tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); + tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); + return gimple_build_call_internal (IFN_MASK_STORE, 4, + base, cookie, pred, rhs); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = convert_optab_handler (maskstore_optab, + e.vector_mode (0), e.gp_mode (0)); + return e.use_contiguous_store_insn (icode); + } +}; + +class svst1_scatter_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_MEMORY; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_gather_address_operands (1); + /* Put the predicate last, as required by mask_scatter_store_optab. */ + e.rotate_inputs_left (0, 6); + machine_mode mem_mode = e.memory_vector_mode (); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_scatter_store_optab, + mem_mode, int_mode); + return e.use_exact_insn (icode); + } +}; + +/* Implements truncating forms of svst1_scatter. */ +class svst1_scatter_truncate_impl : public truncating_store +{ +public: + CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode) + : truncating_store (to_mode) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + e.prepare_gather_address_operands (1); + /* Put the predicate last, since the truncating scatters use the same + operand order as mask_scatter_store_optab. */ + e.rotate_inputs_left (0, 6); + insn_code icode = code_for_aarch64_scatter_store_trunc + (e.memory_vector_mode (), e.vector_mode (0)); + return e.use_exact_insn (icode); + } +}; + +/* Implements truncating contiguous forms of svst1. */ +class svst1_truncate_impl : public truncating_store +{ +public: + CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode) + : truncating_store (to_mode) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), + e.vector_mode (0)); + return e.use_contiguous_store_insn (icode); + } +}; + +/* Implements svst2, svst3 and svst4. */ +class svst234_impl : public full_width_access +{ +public: + CONSTEXPR svst234_impl (unsigned int vectors_per_tuple) + : full_width_access (vectors_per_tuple) {} + + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_MEMORY; + } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree vectype = f.vector_type (0); + + /* Get the predicate and base pointer. */ + gimple_seq stmts = NULL; + tree pred = f.convert_pred (stmts, vectype, 0); + tree base = f.fold_contiguous_base (stmts, vectype); + gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); + + /* View the stored data as an array of vectors. */ + unsigned int num_args = gimple_call_num_args (f.call); + tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); + tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); + tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); + + tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); + return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, + base, cookie, pred, rhs_array); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode tuple_mode = GET_MODE (e.args.last ()); + insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, + tuple_mode, e.vector_mode (0)); + return e.use_contiguous_store_insn (icode); + } +}; + +class svstnt1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_MEMORY; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); + return e.use_contiguous_store_insn (icode); + } +}; + +class svsub_impl : public rtx_code_function +{ +public: + CONSTEXPR svsub_impl () + : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* Canonicalize subtractions of constants to additions. */ + machine_mode mode = e.vector_mode (0); + if (e.try_negating_argument (2, mode)) + return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); + + return rtx_code_function::expand (e); + } +}; + +class svtbl_impl : public permute +{ +public: + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); + } +}; + +/* Implements svtrn1 and svtrn2. */ +class svtrn_impl : public binary_permute +{ +public: + CONSTEXPR svtrn_impl (int base) + : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } + svtrn2: as for svtrn1, but with 1 added to each index. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); + vec_perm_builder builder (nelts, 2, 3); + for (unsigned int i = 0; i < 3; ++i) + { + builder.quick_push (m_base + i * 2); + builder.quick_push (m_base + i * 2 + nelts); + } + return fold_permute (f, builder); + } + + /* 0 for svtrn1, 1 for svtrn2. */ + unsigned int m_base; +}; + +/* Base class for svundef{,2,3,4}. */ +class svundef_impl : public quiet<multi_vector_function> +{ +public: + CONSTEXPR svundef_impl (unsigned int vectors_per_tuple) + : quiet<multi_vector_function> (vectors_per_tuple) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* Don't fold svundef at the gimple level. There's no exact + correspondence for SSA_NAMEs, and we explicitly don't want + to generate a specific value (like an all-zeros vector). */ + if (vectors_per_tuple () == 1) + return NULL; + return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs))); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + rtx target = e.get_reg_target (); + emit_clobber (copy_rtx (target)); + return target; + } +}; + +/* Implements svunpklo and svunpkhi. */ +class svunpk_impl : public quiet<function_base> +{ +public: + CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* Don't fold the predicate ops, since every bit of the svbool_t + result is significant. */ + if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) + return NULL; + + /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian + and VEC_UNPACK_HI_EXPR for big-endian. */ + bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; + tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; + return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); + } + + rtx + expand (function_expander &e) const OVERRIDE + { + machine_mode mode = GET_MODE (e.args[0]); + unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; + unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; + insn_code icode; + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + icode = code_for_aarch64_sve_punpk (unpacku, mode); + else + { + int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; + icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); + } + return e.use_exact_insn (icode); + } + + /* True for svunpkhi, false for svunpklo. */ + bool m_high_p; +}; + +/* Also implements svsudot. */ +class svusdot_impl : public function_base +{ +public: + CONSTEXPR svusdot_impl (bool su) : m_su (su) {} + + rtx + expand (function_expander &e) const OVERRIDE + { + /* The implementation of the ACLE function svsudot (for the non-lane + version) is through the USDOT instruction but with the second and third + inputs swapped. */ + if (m_su) + e.rotate_inputs_left (1, 2); + /* The ACLE function has the same order requirements as for svdot. + While there's no requirement for the RTL pattern to have the same sort + of order as that for <sur>dot_prod, it's easier to read. + Hence we do the same rotation on arguments as svdot_impl does. */ + e.rotate_inputs_left (0, 3); + machine_mode mode = e.vector_mode (0); + insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); + return e.use_exact_insn (icode); + } + +private: + bool m_su; +}; + +/* Implements svuzp1 and svuzp2. */ +class svuzp_impl : public binary_permute +{ +public: + CONSTEXPR svuzp_impl (unsigned int base) + : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* svuzp1: { 0, 2, 4, 6, ... } + svuzp2: { 1, 3, 5, 7, ... }. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); + vec_perm_builder builder (nelts, 1, 3); + for (unsigned int i = 0; i < 3; ++i) + builder.quick_push (m_base + i * 2); + return fold_permute (f, builder); + } + + /* 0 for svuzp1, 1 for svuzp2. */ + unsigned int m_base; +}; + +/* A function_base for svwhilele and svwhilelt functions. */ +class svwhilelx_impl : public while_comparison +{ +public: + CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) + : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p) + {} + + /* Try to fold a call by treating its arguments as constants of type T. */ + template<typename T> + gimple * + fold_type (gimple_folder &f) const + { + /* Only handle cases in which both operands are constant. */ + T arg0, arg1; + if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) + || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) + return NULL; + + /* Check whether the result is known to be all-false. */ + if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) + return f.fold_to_pfalse (); + + /* Punt if we can't tell at compile time whether the result + is all-false. */ + if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) + return NULL; + + /* At this point we know the result has at least one set element. */ + poly_uint64 diff = arg1 - arg0; + poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); + + /* Canonicalize the svwhilele form to the svwhilelt form. Subtract + from NELTS rather than adding to DIFF, to prevent overflow. */ + if (m_eq_p) + nelts -= 1; + + /* Check whether the result is known to be all-true. */ + if (known_ge (diff, nelts)) + return f.fold_to_ptrue (); + + /* Punt if DIFF might not be the actual number of set elements + in the result. Conditional equality is fine. */ + if (maybe_gt (diff, nelts)) + return NULL; + + /* At this point we know that the predicate will have DIFF set elements + for svwhilelt and DIFF + 1 set elements for svwhilele (which stops + after rather than before ARG1 is reached). See if we can create + the predicate at compile time. */ + unsigned HOST_WIDE_INT vl; + if (diff.is_constant (&vl)) + /* Overflow is no longer possible after the checks above. */ + return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); + + return NULL; + } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + if (f.type_suffix (1).unsigned_p) + return fold_type<poly_uint64> (f); + else + return fold_type<poly_int64> (f); + } + + /* True svwhilele, false for svwhilelt. */ + bool m_eq_p; +}; + +class svwrffr_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const OVERRIDE + { + return CP_WRITE_FFR; + } + + rtx + expand (function_expander &e) const OVERRIDE + { + return e.use_exact_insn (CODE_FOR_aarch64_wrffr); + } +}; + +/* Implements svzip1 and svzip2. */ +class svzip_impl : public binary_permute +{ +public: + CONSTEXPR svzip_impl (unsigned int base) + : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } + svzip2: as for svzip1, but with nelts / 2 added to each index. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); + poly_uint64 base = m_base * exact_div (nelts, 2); + vec_perm_builder builder (nelts, 2, 3); + for (unsigned int i = 0; i < 3; ++i) + { + builder.quick_push (base + i); + builder.quick_push (base + i + nelts); + } + return fold_permute (f, builder); + } + + /* 0 for svzip1, 1 for svzip2. */ + unsigned int m_base; +}; + +} /* end anonymous namespace */ + +namespace aarch64_sve { + +FUNCTION (svabd, svabd_impl,) +FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS)) +FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) +FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) +FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) +FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) +FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) +FUNCTION (svadda, svadda_impl,) +FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) +FUNCTION (svadrb, svadr_bhwd_impl, (0)) +FUNCTION (svadrd, svadr_bhwd_impl, (3)) +FUNCTION (svadrh, svadr_bhwd_impl, (1)) +FUNCTION (svadrw, svadr_bhwd_impl, (2)) +FUNCTION (svand, rtx_code_function, (AND, AND)) +FUNCTION (svandv, reduction, (UNSPEC_ANDV)) +FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) +FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) +FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1)) +FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) +FUNCTION (svbfdot_lane, fixed_insn_function, + (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) +FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) +FUNCTION (svbfmlalb_lane, fixed_insn_function, + (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) +FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) +FUNCTION (svbfmlalt_lane, fixed_insn_function, + (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) +FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) +FUNCTION (svbic, svbic_impl,) +FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) +FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) +FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) +FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) +FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) +FUNCTION (svcadd, svcadd_impl,) +FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) +FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) +FUNCTION (svcls, unary_count, (CLRSB)) +FUNCTION (svclz, unary_count, (CLZ)) +FUNCTION (svcmla, svcmla_impl,) +FUNCTION (svcmla_lane, svcmla_lane_impl,) +FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) +FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, + UNSPEC_COND_CMPEQ_WIDE)) +FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) +FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, + UNSPEC_COND_CMPHS_WIDE)) +FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) +FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, + UNSPEC_COND_CMPHI_WIDE)) +FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) +FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, + UNSPEC_COND_CMPLS_WIDE)) +FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) +FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, + UNSPEC_COND_CMPLO_WIDE)) +FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) +FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, + UNSPEC_COND_CMPNE_WIDE)) +FUNCTION (svcmpuo, svcmpuo_impl,) +FUNCTION (svcnot, svcnot_impl,) +FUNCTION (svcnt, unary_count, (POPCOUNT)) +FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) +FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) +FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) +FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) +FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) +FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) +FUNCTION (svcntp, svcntp_impl,) +FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) +FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) +FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) +FUNCTION (svcreate2, svcreate_impl, (2)) +FUNCTION (svcreate3, svcreate_impl, (3)) +FUNCTION (svcreate4, svcreate_impl, (4)) +FUNCTION (svcvt, svcvt_impl,) +FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) +FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) +FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) +FUNCTION (svdot, svdot_impl,) +FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) +FUNCTION (svdup, svdup_impl,) +FUNCTION (svdup_lane, svdup_lane_impl,) +FUNCTION (svdupq, svdupq_impl,) +FUNCTION (svdupq_lane, svdupq_lane_impl,) +FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) +FUNCTION (sveorv, reduction, (UNSPEC_XORV)) +FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) +FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) +FUNCTION (svextb, svext_bhw_impl, (QImode)) +FUNCTION (svexth, svext_bhw_impl, (HImode)) +FUNCTION (svextw, svext_bhw_impl, (SImode)) +FUNCTION (svget2, svget_impl, (2)) +FUNCTION (svget3, svget_impl, (3)) +FUNCTION (svget4, svget_impl, (4)) +FUNCTION (svindex, svindex_impl,) +FUNCTION (svinsr, svinsr_impl,) +FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) +FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) +FUNCTION (svld1, svld1_impl,) +FUNCTION (svld1_gather, svld1_gather_impl,) +FUNCTION (svld1ro, svld1ro_impl,) +FUNCTION (svld1rq, svld1rq_impl,) +FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) +FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) +FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) +FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) +FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) +FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) +FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) +FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) +FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) +FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) +FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) +FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) +FUNCTION (svld2, svld234_impl, (2)) +FUNCTION (svld3, svld234_impl, (3)) +FUNCTION (svld4, svld234_impl, (4)) +FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) +FUNCTION (svldff1_gather, svldff1_gather_impl,) +FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) +FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) +FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) +FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) +FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) +FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) +FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) +FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) +FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) +FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) +FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) +FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) +FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) +FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) +FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) +FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) +FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) +FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) +FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) +FUNCTION (svldnt1, svldnt1_impl,) +FUNCTION (svlen, svlen_impl,) +FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) +FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) +FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) +FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) +FUNCTION (svmad, svmad_impl,) +FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) +FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) +FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) +FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) +FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) +FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) +FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) +FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) +FUNCTION (svmla, svmla_impl,) +FUNCTION (svmla_lane, svmla_lane_impl,) +FUNCTION (svmls, svmls_impl,) +FUNCTION (svmls_lane, svmls_lane_impl,) +FUNCTION (svmmla, svmmla_impl,) +FUNCTION (svmov, svmov_impl,) +FUNCTION (svmsb, svmsb_impl,) +FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) +FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) +FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, + UNSPEC_UMUL_HIGHPART, -1)) +FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) +FUNCTION (svnand, svnand_impl,) +FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG)) +FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) +FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) +FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) +FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) +FUNCTION (svnor, svnor_impl,) +FUNCTION (svnot, svnot_impl,) +FUNCTION (svorn, svorn_impl,) +FUNCTION (svorr, rtx_code_function, (IOR, IOR)) +FUNCTION (svorv, reduction, (UNSPEC_IORV)) +FUNCTION (svpfalse, svpfalse_impl,) +FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) +FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) +FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) +FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) +FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) +FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) +FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) +FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) +FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) +FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) +FUNCTION (svptest_any, svptest_impl, (NE)) +FUNCTION (svptest_first, svptest_impl, (LT)) +FUNCTION (svptest_last, svptest_impl, (LTU)) +FUNCTION (svptrue, svptrue_impl,) +FUNCTION (svptrue_pat, svptrue_pat_impl,) +FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1)) +FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) +FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) +FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) +FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) +FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) +FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) +FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) +FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) +FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) +FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) +FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) +FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) +FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) +FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) +FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) +FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) +FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) +FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) +FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1)) +FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) +FUNCTION (svrdffr, svrdffr_impl,) +FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE)) +FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) +FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) +FUNCTION (svreinterpret, svreinterpret_impl,) +FUNCTION (svrev, svrev_impl,) +FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) +FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) +FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) +FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) +FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) +FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) +FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) +FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) +FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) +FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) +FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) +FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) +FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) +FUNCTION (svsel, svsel_impl,) +FUNCTION (svset2, svset_impl, (2)) +FUNCTION (svset3, svset_impl, (3)) +FUNCTION (svset4, svset_impl, (4)) +FUNCTION (svsetffr, svsetffr_impl,) +FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) +FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) +FUNCTION (svst1, svst1_impl,) +FUNCTION (svst1_scatter, svst1_scatter_impl,) +FUNCTION (svst1b, svst1_truncate_impl, (QImode)) +FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) +FUNCTION (svst1h, svst1_truncate_impl, (HImode)) +FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) +FUNCTION (svst1w, svst1_truncate_impl, (SImode)) +FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) +FUNCTION (svst2, svst234_impl, (2)) +FUNCTION (svst3, svst234_impl, (3)) +FUNCTION (svst4, svst234_impl, (4)) +FUNCTION (svstnt1, svstnt1_impl,) +FUNCTION (svsub, svsub_impl,) +FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) +FUNCTION (svsudot, svusdot_impl, (true)) +FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) +FUNCTION (svtbl, svtbl_impl,) +FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) +FUNCTION (svtrn1, svtrn_impl, (0)) +FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, + UNSPEC_TRN1Q)) +FUNCTION (svtrn2, svtrn_impl, (1)) +FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, + UNSPEC_TRN2Q)) +FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) +FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) +FUNCTION (svundef, svundef_impl, (1)) +FUNCTION (svundef2, svundef_impl, (2)) +FUNCTION (svundef3, svundef_impl, (3)) +FUNCTION (svundef4, svundef_impl, (4)) +FUNCTION (svunpkhi, svunpk_impl, (true)) +FUNCTION (svunpklo, svunpk_impl, (false)) +FUNCTION (svusdot, svusdot_impl, (false)) +FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) +FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) +FUNCTION (svuzp1, svuzp_impl, (0)) +FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, + UNSPEC_UZP1Q)) +FUNCTION (svuzp2, svuzp_impl, (1)) +FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, + UNSPEC_UZP2Q)) +FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) +FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) +FUNCTION (svwrffr, svwrffr_impl,) +FUNCTION (svzip1, svzip_impl, (0)) +FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, + UNSPEC_ZIP1Q)) +FUNCTION (svzip2, svzip_impl, (1)) +FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, + UNSPEC_ZIP2Q)) + +} /* end namespace aarch64_sve */