Mercurial > hg > CbC > CbC_gcc

diff gcc/config/aarch64/aarch64-sve-builtins-base.cc @ 145:1830386684a0
gcc-9.2.0
author: anatofuz
date: Thu, 13 Feb 2020 11:34:05 +0900
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc	Thu Feb 13 11:34:05 2020 +0900
@@ -0,0 +1,2820 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+   Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "rtx-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-functions.h"
+
+using namespace aarch64_sve;
+
+namespace {
+
+/* Return the UNSPEC_CMLA* unspec for rotation amount ROT.  */
+static int
+unspec_cmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_CMLA;
+    case 90: return UNSPEC_CMLA90;
+    case 180: return UNSPEC_CMLA180;
+    case 270: return UNSPEC_CMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return the UNSPEC_FCMLA* unspec for rotation amount ROT.  */
+static int
+unspec_fcmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_FCMLA;
+    case 90: return UNSPEC_FCMLA90;
+    case 180: return UNSPEC_FCMLA180;
+    case 270: return UNSPEC_FCMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT.  */
+static int
+unspec_cond_fcmla (int rot)
+{
+  switch (rot)
+    {
+    case 0: return UNSPEC_COND_FCMLA;
+    case 90: return UNSPEC_COND_FCMLA90;
+    case 180: return UNSPEC_COND_FCMLA180;
+    case 270: return UNSPEC_COND_FCMLA270;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Expand a call to svmad, or svmla after reordering its operands.
+   Make _m forms merge with argument MERGE_ARGNO.  */
+static rtx
+expand_mad (function_expander &e,
+	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
+{
+  if (e.pred == PRED_x)
+    {
+      insn_code icode;
+      if (e.type_suffix (0).integer_p)
+	icode = code_for_aarch64_pred_fma (e.vector_mode (0));
+      else
+	icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
+      return e.use_pred_x_insn (icode);
+    }
+
+  insn_code icode = e.direct_optab_handler (cond_fma_optab);
+  return e.use_cond_insn (icode, merge_argno);
+}
+
+/* Expand a call to svmla_lane or svmls_lane using floating-point unspec
+   UNSPEC.  */
+static rtx
+expand_mla_mls_lane (function_expander &e, int unspec)
+{
+  /* Put the operands in the normal (fma ...) order, with the accumulator
+     last.  This fits naturally since that's also the unprinted operand
+     in the asm output.  */
+  e.rotate_inputs_left (0, 4);
+  insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
+  return e.use_exact_insn (icode);
+}
+
+/* Expand a call to svmsb, or svmls after reordering its operands.
+   Make _m forms merge with argument MERGE_ARGNO.  */
+static rtx
+expand_msb (function_expander &e,
+	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
+{
+  if (e.pred == PRED_x)
+    {
+      insn_code icode;
+      if (e.type_suffix (0).integer_p)
+	icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
+      else
+	icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
+      return e.use_pred_x_insn (icode);
+    }
+
+  insn_code icode = e.direct_optab_handler (cond_fnma_optab);
+  return e.use_cond_insn (icode, merge_argno);
+}
+
+class svabd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The integer operations are represented as the subtraction of the
+       minimum from the maximum, with the signedness of the instruction
+       keyed off the signedness of the maximum operation.  */
+    rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+    insn_code icode;
+    if (e.pred == PRED_x)
+      {
+	if (e.type_suffix (0).integer_p)
+	  icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
+	else
+	  icode = code_for_aarch64_pred_abd (e.vector_mode (0));
+	return e.use_pred_x_insn (icode);
+      }
+
+    if (e.type_suffix (0).integer_p)
+      icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
+    else
+      icode = code_for_aarch64_cond_abd (e.vector_mode (0));
+    return e.use_cond_insn (icode);
+  }
+};
+
+/* Implements svacge, svacgt, svacle and svaclt.  */
+class svac_impl : public function_base
+{
+public:
+  CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.add_ptrue_hint (0, e.gp_mode (0));
+    insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec code for the underlying comparison.  */
+  int m_unspec;
+};
+
+class svadda_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the predicate last, as required by mask_fold_left_plus_optab.  */
+    e.rotate_inputs_left (0, 3);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svadr[bhwd].  */
+class svadr_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = GET_MODE (e.args[0]);
+    if (m_shift == 0)
+      return e.use_exact_insn (code_for_aarch64_adr (mode));
+
+    /* Turn the access size into an extra shift argument.  */
+    rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
+    e.args.quick_push (expand_vector_broadcast (mode, shift));
+    return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
+  }
+
+  /* How many bits left to shift the vector displacement.  */
+  unsigned int m_shift;
+};
+
+class svbic_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert svbic of a constant into svand of its inverse.  */
+    if (CONST_INT_P (e.args[2]))
+      {
+	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
+	e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
+	return e.map_to_rtx_codes (AND, AND, -1);
+      }
+
+    if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      {
+	gcc_assert (e.pred == PRED_z);
+	return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
+      }
+
+    if (e.pred == PRED_x)
+      return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
+
+    return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
+  }
+};
+
+/* Implements svbrkn, svbrkpa and svbrkpb.  */
+class svbrk_binary_impl : public function_base
+{
+public:
+  CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+/* Implements svbrka and svbrkb.  */
+class svbrk_unary_impl : public function_base
+{
+public:
+  CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svcadd_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    if (rot == 90)
+      return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
+			       UNSPEC_COND_FCADD90);
+    if (rot == 270)
+      return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
+			       UNSPEC_COND_FCADD270);
+    gcc_unreachable ();
+  }
+};
+
+/* Implements svclasta and svclastb.  */
+class svclast_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Match the fold_extract_optab order.  */
+    std::swap (e.args[0], e.args[1]);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_n)
+      icode = code_for_fold_extract (m_unspec, mode);
+    else
+      icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svcmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    if (e.type_suffix (0).float_p)
+      {
+	/* Make the operand order the same as the one used by the fma optabs,
+	   with the accumulator last.  */
+	e.rotate_inputs_left (1, 4);
+	return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
+      }
+    else
+      {
+	int cmla = unspec_cmla (rot);
+	return e.map_to_unspecs (cmla, cmla, -1);
+      }
+  }
+};
+
+class svcmla_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Convert the rotation amount into a specific unspec.  */
+    int rot = INTVAL (e.args.pop ());
+    machine_mode mode = e.vector_mode (0);
+    if (e.type_suffix (0).float_p)
+      {
+	/* Make the operand order the same as the one used by the fma optabs,
+	   with the accumulator last.  */
+	e.rotate_inputs_left (0, 4);
+	insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
+	return e.use_exact_insn (icode);
+      }
+    else
+      {
+	insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
+	return e.use_exact_insn (icode);
+      }
+  }
+};
+
+/* Implements svcmp<cc> (except svcmpuo, which is handled separately).  */
+class svcmp_impl : public function_base
+{
+public:
+  CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
+    : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    tree rhs1 = gimple_call_arg (f.call, 1);
+    tree rhs2 = gimple_call_arg (f.call, 2);
+
+    /* Convert a ptrue-predicated integer comparison into the corresponding
+       gimple-level operation.  */
+    if (integer_all_onesp (pg)
+	&& f.type_suffix (0).element_bytes == 1
+	&& f.type_suffix (0).integer_p)
+      {
+	gimple_seq stmts = NULL;
+	rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
+	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+	return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+
+    /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
+       operand.  */
+    e.add_ptrue_hint (0, e.gp_mode (0));
+
+    if (e.type_suffix (0).integer_p)
+      {
+	bool unsigned_p = e.type_suffix (0).unsigned_p;
+	rtx_code code = get_rtx_code (m_code, unsigned_p);
+	return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
+      }
+
+    insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The tree code associated with the comparison.  */
+  tree_code m_code;
+
+  /* The unspec code to use for floating-point comparisons.  */
+  int m_unspec_for_fp;
+};
+
+/* Implements svcmp<cc>_wide.  */
+class svcmp_wide_impl : public function_base
+{
+public:
+  CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
+			     int unspec_for_uint)
+    : m_code (code), m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    bool unsigned_p = e.type_suffix (0).unsigned_p;
+    rtx_code code = get_rtx_code (m_code, unsigned_p);
+
+    /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
+       operand.  */
+    e.add_ptrue_hint (0, e.gp_mode (0));
+
+    /* If the argument is a constant that the unwidened comparisons
+       can handle directly, use them instead.  */
+    insn_code icode = code_for_aarch64_pred_cmp (code, mode);
+    rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
+    if (CONSTANT_P (op2)
+	&& insn_data[icode].operand[4].predicate (op2, DImode))
+      {
+	e.args[3] = op2;
+	return e.use_exact_insn (icode);
+      }
+
+    int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
+    return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
+  }
+
+  /* The tree code associated with the comparison.  */
+  tree_code m_code;
+
+  /* The unspec codes for signed and unsigned wide comparisons
+     respectively.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+};
+
+class svcmpuo_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.add_ptrue_hint (0, e.gp_mode (0));
+    return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
+  }
+};
+
+class svcnot_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    if (e.pred == PRED_x)
+      {
+	/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
+	   a ptrue hint.  */
+	e.add_ptrue_hint (0, e.gp_mode (0));
+	return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
+      }
+
+    return e.use_cond_insn (code_for_cond_cnot (mode), 0);
+  }
+};
+
+/* Implements svcnt[bhwd], which count the number of elements
+   in a particular vector mode.  */
+class svcnt_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree count = build_int_cstu (TREE_TYPE (f.lhs),
+				 GET_MODE_NUNITS (m_ref_mode));
+    return gimple_build_assign (f.lhs, count);
+  }
+
+  rtx
+  expand (function_expander &) const OVERRIDE
+  {
+    return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
+  }
+
+  /* The mode of the vector associated with the [bhwd] suffix.  */
+  machine_mode m_ref_mode;
+};
+
+/* Implements svcnt[bhwd]_pat.  */
+class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
+{
+public:
+  CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode)
+    : svcnt_bhwd_impl (ref_mode) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pattern_arg = gimple_call_arg (f.call, 0);
+    aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
+
+    if (pattern == AARCH64_SV_ALL)
+      /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] ().  */
+      return svcnt_bhwd_impl::fold (f);
+
+    /* See whether we can count the number of elements in the pattern
+       at compile time.  */
+    unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
+    HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
+    if (value >= 0)
+      {
+	tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
+	return gimple_build_assign (f.lhs, count);
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
+    e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
+    e.args.quick_push (const1_rtx);
+    return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
+  }
+};
+
+class svcntp_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    e.add_ptrue_hint (0, mode);
+    return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
+  }
+};
+
+/* Implements svcreate2, svcreate3 and svcreate4.  */
+class svcreate_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    unsigned int nargs = gimple_call_num_args (f.call);
+    tree lhs_type = TREE_TYPE (f.lhs);
+
+    /* Replace the call with a clobber of the result (to prevent it from
+       becoming upwards exposed) followed by stores into each individual
+       vector of tuple.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the clobber statement
+       rather than the final vector store.  */
+    gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
+
+    for (unsigned int i = nargs; i-- > 0; )
+      {
+	tree rhs_vector = gimple_call_arg (f.call, i);
+	tree field = tuple_type_field (TREE_TYPE (f.lhs));
+	tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+				 unshare_expr (f.lhs), field, NULL_TREE);
+	tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
+				  lhs_array, size_int (i),
+				  NULL_TREE, NULL_TREE);
+	gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
+	gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
+      }
+    return clobber;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
+
+    /* Record that LHS_TUPLE is dead before the first store.  */
+    emit_clobber (lhs_tuple);
+    for (unsigned int i = 0; i < e.args.length (); ++i)
+      {
+	/* Use an lvalue subreg to refer to vector I in LHS_TUPLE.  */
+	rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
+					      lhs_tuple, GET_MODE (lhs_tuple),
+					      i * BYTES_PER_SVE_VECTOR);
+	emit_move_insn (lhs_vector, e.args[i]);
+      }
+    return lhs_tuple;
+  }
+};
+
+class svcvt_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode0 = e.vector_mode (0);
+    machine_mode mode1 = e.vector_mode (1);
+    insn_code icode;
+    /* All this complication comes from the need to select four things
+       simultaneously:
+
+       (1) the kind of conversion (int<-float, float<-int, float<-float)
+       (2) signed vs. unsigned integers, where relevant
+       (3) the predication mode, which must be the wider of the predication
+	   modes for MODE0 and MODE1
+       (4) the predication type (m, x or z)
+
+       The only supported int<->float conversions for which the integer is
+       narrower than the float are SI<->DF.  It's therefore more convenient
+       to handle (3) by defining two patterns for int<->float conversions:
+       one in which the integer is at least as wide as the float and so
+       determines the predication mode, and another single SI<->DF pattern
+       in which the float's mode determines the predication mode (which is
+       always VNx2BI in that case).
+
+       The names of the patterns follow the optab convention of giving
+       the source mode before the destination mode.  */
+    if (e.type_suffix (1).integer_p)
+      {
+	int unspec = (e.type_suffix (1).unsigned_p
+		      ? UNSPEC_COND_UCVTF
+		      : UNSPEC_COND_SCVTF);
+	if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
+		   : code_for_cond_nonextend (unspec, mode1, mode0));
+	else
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
+		   : code_for_cond_extend (unspec, mode1, mode0));
+      }
+    else
+      {
+	int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
+		      : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
+		      : UNSPEC_COND_FCVTZS);
+	if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
+		   : code_for_cond_nontrunc (unspec, mode1, mode0));
+	else
+	  icode = (e.pred == PRED_x
+		   ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
+		   : code_for_cond_trunc (unspec, mode1, mode0));
+      }
+
+    if (e.pred == PRED_x)
+      return e.use_pred_x_insn (icode);
+    return e.use_cond_insn (icode);
+  }
+};
+
+class svdot_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* In the optab, the multiplication operands come before the accumulator
+       operand.  The optab is keyed off the multiplication mode.  */
+    e.rotate_inputs_left (0, 3);
+    insn_code icode
+      = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
+					 0, GET_MODE (e.args[0]));
+    return e.use_unpred_insn (icode);
+  }
+};
+
+class svdotprod_lane_impl : public unspec_based_function_base
+{
+public:
+  CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
+				 int unspec_for_uint,
+				 int unspec_for_float)
+    : unspec_based_function_base (unspec_for_sint,
+				  unspec_for_uint,
+				  unspec_for_float) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Use the same ordering as the dot_prod_optab, with the
+       accumulator last.  */
+    e.rotate_inputs_left (0, 4);
+    int unspec = unspec_for (e);
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
+  }
+};
+
+class svdup_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vec_type = TREE_TYPE (f.lhs);
+    tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
+
+    if (f.pred == PRED_none || f.pred == PRED_x)
+      {
+	if (CONSTANT_CLASS_P (rhs))
+	  {
+	    if (f.type_suffix (0).bool_p)
+	      return (tree_to_shwi (rhs)
+		      ? f.fold_to_ptrue ()
+		      : f.fold_to_pfalse ());
+
+	    tree rhs_vector = build_vector_from_val (vec_type, rhs);
+	    return gimple_build_assign (f.lhs, rhs_vector);
+	  }
+
+	/* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
+	   would need to introduce an extra and unwanted conversion to
+	   the truth vector element type.  */
+	if (!f.type_suffix (0).bool_p)
+	  return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
+      }
+
+    /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>.  */
+    if (f.pred == PRED_z)
+      {
+	gimple_seq stmts = NULL;
+	tree pred = f.convert_pred (stmts, vec_type, 0);
+	rhs = f.force_vector (stmts, vec_type, rhs);
+	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+	return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
+				    build_zero_cst (vec_type));
+      }
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.pred == PRED_none || e.pred == PRED_x)
+      /* There's no benefit to using predicated instructions for _x here.  */
+      return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
+
+    /* Model predicated svdups as a SEL in which the "true" value is
+       the duplicate of the function argument and the "false" value
+       is the value of inactive lanes.  */
+    insn_code icode;
+    machine_mode mode = e.vector_mode (0);
+    if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
+      /* Duplicate the constant to fill a vector.  The pattern optimizes
+	 various cases involving constant operands, falling back to SEL
+	 if necessary.  */
+      icode = code_for_vcond_mask (mode, mode);
+    else
+      /* Use the pattern for selecting between a duplicated scalar
+	 variable and a vector fallback.  */
+      icode = code_for_aarch64_sel_dup (mode);
+    return e.use_vcond_mask_insn (icode);
+  }
+};
+
+class svdup_lane_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The native DUP lane has an index range of 64 bytes.  */
+    machine_mode mode = e.vector_mode (0);
+    if (CONST_INT_P (e.args[1])
+	&& IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
+      return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
+
+    /* Treat svdup_lane as if it were svtbl_n.  */
+    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
+  }
+};
+
+class svdupq_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vec_type = TREE_TYPE (f.lhs);
+    unsigned int nargs = gimple_call_num_args (f.call);
+    /* For predicates, pad out each argument so that we have one element
+       per bit.  */
+    unsigned int factor = (f.type_suffix (0).bool_p
+			   ? f.type_suffix (0).element_bytes : 1);
+    tree_vector_builder builder (vec_type, nargs * factor, 1);
+    for (unsigned int i = 0; i < nargs; ++i)
+      {
+	tree elt = gimple_call_arg (f.call, i);
+	if (!CONSTANT_CLASS_P (elt))
+	  return NULL;
+	builder.quick_push (elt);
+	for (unsigned int j = 1; j < factor; ++j)
+	  builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
+      }
+    return gimple_build_assign (f.lhs, builder.build ());
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    unsigned int elements_per_vq = e.args.length ();
+    if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      {
+	/* Construct a vector of integers so that we can compare them against
+	   zero below.  Zero vs. nonzero is the only distinction that
+	   matters.  */
+	mode = aarch64_sve_int_mode (mode);
+	for (unsigned int i = 0; i < elements_per_vq; ++i)
+	  e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
+					  e.args[i], QImode);
+      }
+
+    /* Get the 128-bit Advanced SIMD vector for this data size.  */
+    scalar_mode element_mode = GET_MODE_INNER (mode);
+    machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
+    gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
+
+    /* Put the arguments into a 128-bit Advanced SIMD vector.  We want
+       argument N to go into architectural lane N, whereas Advanced SIMD
+       vectors are loaded memory lsb to register lsb.  We therefore need
+       to reverse the elements for big-endian targets.  */
+    rtx vq_reg = gen_reg_rtx (vq_mode);
+    rtvec vec = rtvec_alloc (elements_per_vq);
+    for (unsigned int i = 0; i < elements_per_vq; ++i)
+      {
+	unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
+	RTVEC_ELT (vec, i) = e.args[argno];
+      }
+    aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
+
+    /* If the result is a boolean, compare the data vector against zero.  */
+    if (mode != e.vector_mode (0))
+      {
+	rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
+	return aarch64_convert_sve_data_to_pred (e.possible_target,
+						 e.vector_mode (0), data_dupq);
+      }
+
+    return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
+  }
+};
+
+class svdupq_lane_impl : public quiet<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    rtx index = e.args[1];
+    if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
+      {
+	/* Use the .Q form of DUP, which is the native instruction for
+	   this function.  */
+	insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
+	unsigned int num_indices = e.elements_per_vq (0);
+	rtx indices = aarch64_gen_stepped_int_parallel
+	  (num_indices, INTVAL (index) * num_indices, 1);
+
+	e.add_output_operand (icode);
+	e.add_input_operand (icode, e.args[0]);
+	e.add_fixed_operand (indices);
+	return e.generate_insn (icode);
+      }
+
+    /* Build a .D TBL index for the pairs of doublewords that we want to
+       duplicate.  */
+    if (CONST_INT_P (index))
+      {
+	/* The index vector is a constant.  */
+	rtx_vector_builder builder (VNx2DImode, 2, 1);
+	builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
+	builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
+	index = builder.build ();
+      }
+    else
+      {
+	/* Duplicate INDEX * 2 to fill a DImode vector.  The ACLE spec
+	   explicitly allows the top of the index to be dropped.  */
+	index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
+							index, const1_rtx));
+	index = expand_vector_broadcast (VNx2DImode, index);
+
+	/* Get an alternating 0, 1 predicate.  */
+	rtx_vector_builder builder (VNx2BImode, 2, 1);
+	builder.quick_push (const0_rtx);
+	builder.quick_push (constm1_rtx);
+	rtx pg = force_reg (VNx2BImode, builder.build ());
+
+	/* Add one to the odd elements of the index.  */
+	rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
+	rtx target = gen_reg_rtx (VNx2DImode);
+	emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
+	index = target;
+      }
+
+    e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
+    e.args[1] = index;
+    return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+  }
+};
+
+/* Implements svextb, svexth and svextw.  */
+class svext_bhw_impl : public function_base
+{
+public:
+  CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
+    : m_from_mode (from_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).unsigned_p)
+      {
+	/* Convert to an AND.  The widest we go is 0xffffffff, which fits
+	   in a CONST_INT.  */
+	e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
+	if (e.pred == PRED_m)
+	  /* We now have arguments "(inactive, pg, op, mask)".  Convert this
+	     to "(pg, op, mask, inactive)" so that the order matches svand_m
+	     with an extra argument on the end.  Take the inactive elements
+	     from this extra argument.  */
+	  e.rotate_inputs_left (0, 4);
+	return e.map_to_rtx_codes (AND, AND, -1, 3);
+      }
+
+    machine_mode wide_mode = e.vector_mode (0);
+    poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
+    machine_mode narrow_mode
+      = aarch64_sve_data_mode (m_from_mode, nunits).require ();
+    if (e.pred == PRED_x)
+      {
+	insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
+	return e.use_pred_x_insn (icode);
+      }
+
+    insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
+    return e.use_cond_insn (icode);
+  }
+
+  /* The element mode that we're extending from.  */
+  scalar_int_mode m_from_mode;
+};
+
+/* Implements svget2, svget3 and svget4.  */
+class svget_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svget_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Fold into a normal gimple component access.  */
+    tree rhs_tuple = gimple_call_arg (f.call, 0);
+    tree index = gimple_call_arg (f.call, 1);
+    tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
+    tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+			     rhs_tuple, field, NULL_TREE);
+    tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
+			      rhs_array, index, NULL_TREE, NULL_TREE);
+    return gimple_build_assign (f.lhs, rhs_vector);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Fold the access into a subreg rvalue.  */
+    return simplify_gen_subreg (e.vector_mode (0), e.args[0],
+				GET_MODE (e.args[0]),
+				INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
+  }
+};
+
+class svindex_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
+  }
+};
+
+class svinsr_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
+						  gimple_call_arg (f.call, 0),
+						  gimple_call_arg (f.call, 1));
+    gimple_call_set_lhs (new_call, f.lhs);
+    return new_call;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = direct_optab_handler (vec_shl_insert_optab,
+					    e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svlasta and svlastb.  */
+class svlast_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
+  }
+
+  /* The unspec code associated with the operation.  */
+  int m_unspec;
+};
+
+class svld1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
+						  base, cookie, pred);
+    gimple_call_set_lhs (new_call, f.lhs);
+    return new_call;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = convert_optab_handler (maskload_optab,
+					     e.vector_mode (0), e.gp_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements extending contiguous forms of svld1.  */
+class svld1_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svld1_extend_impl (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_load (extend_rtx_code (),
+					     e.vector_mode (0),
+					     e.memory_vector_mode ());
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svld1_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, as required by mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+    insn_code icode = convert_optab_handler (mask_gather_load_optab,
+					     mem_mode, int_mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements extending forms of svld1_gather.  */
+class svld1_gather_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since the extending gathers use the same
+       operand order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    /* Add a constant predicate for the extension rtx.  */
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
+						    e.vector_mode (0),
+						    e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class load_replicate : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &fi) const OVERRIDE
+  {
+    return fi.scalar_type (0);
+  }
+};
+
+class svld1rq_impl : public load_replicate
+{
+public:
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const OVERRIDE
+  {
+    return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svld1ro_impl : public load_replicate
+{
+public:
+  machine_mode
+  memory_vector_mode (const function_instance &) const OVERRIDE
+  {
+    return OImode;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements svld2, svld3 and svld4.  */
+class svld234_impl : public full_width_access
+{
+public:
+  CONSTEXPR svld234_impl (unsigned int vectors_per_tuple)
+    : full_width_access (vectors_per_tuple) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree tuple_type = TREE_TYPE (f.lhs);
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    /* Emit two statements: a clobber of the lhs, so that it isn't
+       upwards exposed, and then the load itself.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the clobber statement
+       rather than the load.  */
+    gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
+
+    /* View the loaded data as an array of vectors.  */
+    tree field = tuple_type_field (tuple_type);
+    tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
+			     unshare_expr (f.lhs));
+
+    /* Emit the load itself.  */
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
+						  base, cookie, pred);
+    gimple_call_set_lhs (new_call, lhs_array);
+    gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
+
+    return clobber;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
+    insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
+					     tuple_mode, e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svldff1_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since ldff1_gather uses the same operand
+       order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
+  }
+};
+
+/* Implements extending forms of svldff1_gather.  */
+class svldff1_gather_extend : public extending_load
+{
+public:
+  CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type)
+    : extending_load (memory_type) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since ldff1_gather uses the same operand
+       order as mask_gather_load_optab.  */
+    e.rotate_inputs_left (0, 5);
+    /* Add a constant predicate for the extension rtx.  */
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
+						     e.vector_mode (0),
+						     e.memory_vector_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svldnt1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Implements svldff1 and svldnf1.  */
+class svldxf1_impl : public full_width_access
+{
+public:
+  CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    machine_mode mode = e.vector_mode (0);
+    return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
+  }
+
+  /* The unspec associated with the load.  */
+  int m_unspec;
+};
+
+/* Implements extending contiguous forms of svldff1 and svldnf1.  */
+class svldxf1_extend_impl : public extending_load
+{
+public:
+  CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
+    : extending_load (memory_type), m_unspec (unspec) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_update_ffr_for_load ());
+
+    insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
+					     e.vector_mode (0),
+					     e.memory_vector_mode ());
+    return e.use_contiguous_load_insn (icode);
+  }
+
+  /* The unspec associated with the load.  */
+  int m_unspec;
+};
+
+class svlen_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* The argument only exists for its type.  */
+    tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
+    tree count = build_int_cstu (TREE_TYPE (f.lhs),
+				 TYPE_VECTOR_SUBPARTS (rhs_type));
+    return gimple_build_assign (f.lhs, count);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The argument only exists for its type.  */
+    return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
+  }
+};
+
+class svmad_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return expand_mad (e);
+  }
+};
+
+class svmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the accumulator at the end (argument 3), but keep it as the
+       merge input for _m functions.  */
+    e.rotate_inputs_left (1, 4);
+    return expand_mad (e, 3);
+  }
+};
+
+class svmla_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).integer_p)
+      {
+	machine_mode mode = e.vector_mode (0);
+	return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
+      }
+    return expand_mla_mls_lane (e, UNSPEC_FMLA);
+  }
+};
+
+class svmls_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Put the accumulator at the end (argument 3), but keep it as the
+       merge input for _m functions.  */
+    e.rotate_inputs_left (1, 4);
+    return expand_msb (e, 3);
+  }
+};
+
+class svmov_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return gimple_build_assign (f.lhs, BIT_AND_EXPR,
+				gimple_call_arg (f.call, 0),
+				gimple_call_arg (f.call, 1));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
+       is "AND Pa.B, Pb/Z, Pc.B, Pc.B".  */
+    gcc_assert (e.pred == PRED_z);
+    e.args.quick_push (e.args[1]);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
+  }
+};
+
+class svmls_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix (0).integer_p)
+      {
+	machine_mode mode = e.vector_mode (0);
+	return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
+      }
+    return expand_mla_mls_lane (e, UNSPEC_FMLS);
+  }
+};
+
+class svmmla_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode;
+    if (e.type_suffix (0).integer_p)
+      {
+	if (e.type_suffix (0).unsigned_p)
+	  icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
+	else
+	  icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
+      }
+    else
+      icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svmsb_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return expand_msb (e);
+  }
+};
+
+class svnand_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
+  }
+};
+
+class svnor_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
+  }
+};
+
+class svnot_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      {
+	/* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
+	   is "EOR Pa.B, Pb/Z, Pb.B, Pc.B".  */
+	gcc_assert (e.pred == PRED_z);
+	e.args.quick_insert (1, e.args[0]);
+	return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
+      }
+    return rtx_code_function::expand (e);
+  }
+};
+
+class svorn_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    gcc_assert (e.pred == PRED_z);
+    return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
+  }
+};
+
+class svpfalse_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return f.fold_to_pfalse ();
+  }
+
+  rtx
+  expand (function_expander &) const OVERRIDE
+  {
+    return CONST0_RTX (VNx16BImode);
+  }
+};
+
+/* Implements svpfirst and svpnext, which share the same .md patterns.  */
+class svpfirst_svpnext_impl : public function_base
+{
+public:
+  CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    e.add_ptrue_hint (0, mode);
+    return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
+  }
+
+  /* The unspec associated with the operation.  */
+  int m_unspec;
+};
+
+/* Implements contiguous forms of svprf[bhwd].  */
+class svprf_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_PREFETCH_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_prefetch_operands ();
+    insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
+    return e.use_contiguous_prefetch_insn (icode);
+  }
+
+  /* The mode that we'd use to hold one vector of prefetched data.  */
+  machine_mode m_mode;
+};
+
+/* Implements svprf[bhwd]_gather.  */
+class svprf_bhwd_gather_impl : public function_base
+{
+public:
+  CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_PREFETCH_MEMORY;
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &) const OVERRIDE
+  {
+    return m_mode;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_prefetch_operands ();
+    e.prepare_gather_address_operands (1);
+
+    /* Insert a zero operand to identify the mode of the memory being
+       accessed.  This goes between the gather operands and prefetch
+       operands created above.  */
+    e.args.quick_insert (5, CONST0_RTX (m_mode));
+
+    machine_mode reg_mode = GET_MODE (e.args[2]);
+    insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
+    return e.use_exact_insn (icode);
+  }
+
+  /* The mode that we'd use to hold one vector of prefetched data.  */
+  machine_mode m_mode;
+};
+
+/* Implements svptest_any, svptest_first and svptest_last.  */
+class svptest_impl : public function_base
+{
+public:
+  CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See whether GP is an exact ptrue for some predicate mode;
+       i.e. whether converting the GP to that mode will not drop
+       set bits and will leave all significant bits set.  */
+    machine_mode wide_mode;
+    int hint;
+    if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
+      hint = SVE_KNOWN_PTRUE;
+    else
+      {
+	hint = SVE_MAYBE_NOT_PTRUE;
+	wide_mode = VNx16BImode;
+      }
+
+    /* Generate the PTEST itself.  */
+    rtx pg = force_reg (VNx16BImode, e.args[0]);
+    rtx wide_pg = gen_lowpart (wide_mode, pg);
+    rtx hint_rtx = gen_int_mode (hint, DImode);
+    rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
+    emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
+
+    /* Get the location of the boolean result.  We can provide SImode and
+       DImode values directly; rely on generic code to convert others.  */
+    rtx target = e.possible_target;
+    if (!target
+	|| !REG_P (target)
+	|| (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
+      target = gen_reg_rtx (DImode);
+
+    /* Generate a CSET to convert the CC result of the PTEST to a boolean.  */
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
+				  cc_reg, const0_rtx);
+    emit_insn (gen_rtx_SET (target, compare));
+    return target;
+  }
+
+  /* The comparison code associated with ptest condition.  */
+  rtx_code m_compare;
+};
+
+class svptrue_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    return f.fold_to_ptrue ();
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+  }
+};
+
+class svptrue_pat_impl : public function_base
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree pattern_arg = gimple_call_arg (f.call, 0);
+    aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
+
+    if (pattern == AARCH64_SV_ALL)
+      /* svptrue_pat_bN (SV_ALL) == svptrue_bN ().  */
+      return f.fold_to_ptrue ();
+
+    /* See whether we can count the number of elements in the pattern
+       at compile time.  If so, construct a predicate with that number
+       of 1s followed by all 0s.  */
+    int nelts_per_vq = f.elements_per_vq (0);
+    HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
+    if (value >= 0)
+      return f.fold_to_vl_pred (value);
+
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* In rtl, the predicate is represented as the constant:
+
+         (const:V16BI (unspec:V16BI [(const_int PATTERN)
+				     (const_vector:VnnBI [zeros])]
+				    UNSPEC_PTRUE))
+
+       where nn determines the element size.  */
+    rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
+    return gen_rtx_CONST (VNx16BImode,
+			  gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
+  }
+};
+
+/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}.  */
+class svqdec_svqinc_bhwd_impl : public function_base
+{
+public:
+  CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
+				     rtx_code code_for_uint,
+				     scalar_int_mode elem_mode)
+    : m_code_for_sint (code_for_sint),
+      m_code_for_uint (code_for_uint),
+      m_elem_mode (elem_mode)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Treat non-_pat functions in the same way as _pat functions with
+       an SV_ALL argument.  */
+    if (e.args.length () == 2)
+      e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
+
+    /* Insert the number of elements per 128-bit block as a fake argument,
+       between the pattern and the multiplier.  Arguments 1, 2 and 3 then
+       correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
+       aarch64_sve_cnt_pat for details.  */
+    unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
+    e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
+
+    rtx_code code = (e.type_suffix (0).unsigned_p
+		     ? m_code_for_uint
+		     : m_code_for_sint);
+
+    /* Choose between operating on integer scalars or integer vectors.  */
+    machine_mode mode = e.vector_mode (0);
+    if (e.mode_suffix_id == MODE_n)
+      mode = GET_MODE_INNER (mode);
+    return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
+  }
+
+  /* The saturating addition or subtraction codes to use for signed and
+     unsigned values respectively.  */
+  rtx_code m_code_for_sint;
+  rtx_code m_code_for_uint;
+
+  /* The integer mode associated with the [bhwd] suffix.  */
+  scalar_int_mode m_elem_mode;
+};
+
+/* Implements svqdec[bhwd]{,_pat}.  */
+class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
+{
+public:
+  CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
+    : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
+};
+
+/* Implements svqinc[bhwd]{,_pat}.  */
+class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
+{
+public:
+  CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
+    : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
+};
+
+/* Implements svqdecp and svqincp.  */
+class svqdecp_svqincp_impl : public function_base
+{
+public:
+  CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
+				  rtx_code code_for_uint)
+    : m_code_for_sint (code_for_sint),
+      m_code_for_uint (code_for_uint)
+  {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx_code code = (e.type_suffix (0).unsigned_p
+		     ? m_code_for_uint
+		     : m_code_for_sint);
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_n)
+      {
+	/* Increment or decrement a scalar (whose mode is given by the first
+	   type suffix) by the number of active elements in a predicate
+	   (whose mode is given by the second type suffix).  */
+	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
+	icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
+      }
+    else
+      /* Increment a vector by the number of active elements in a predicate,
+	 with the vector mode determining the predicate mode.  */
+      icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+
+  /* The saturating addition or subtraction codes to use for signed and
+     unsigned values respectively.  */
+  rtx_code m_code_for_sint;
+  rtx_code m_code_for_uint;
+};
+
+class svrdffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_READ_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* See the block comment in aarch64-sve.md for details about the
+       FFR handling.  */
+    emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
+    rtx result = e.use_exact_insn (e.pred == PRED_z
+				   ? CODE_FOR_aarch64_rdffr_z
+				   : CODE_FOR_aarch64_rdffr);
+    emit_insn (gen_aarch64_update_ffrt ());
+    return result;
+  }
+};
+
+class svreinterpret_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Punt to rtl if the effect of the reinterpret on registers does not
+       conform to GCC's endianness model.  */
+    if (!targetm.can_change_mode_class (f.vector_mode (0),
+					f.vector_mode (1), FP_REGS))
+      return NULL;
+
+    /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
+       reinterpretation.  */
+    tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
+		       gimple_call_arg (f.call, 0));
+    return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
+  }
+};
+
+class svrev_impl : public permute
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Punt for now on _b16 and wider; we'd need more complex evpc logic
+       to rerecognize the result.  */
+    if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
+      return NULL;
+
+    /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
+    poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 1, 3);
+    for (int i = 0; i < 3; ++i)
+      builder.quick_push (nelts - i - 1);
+    return fold_permute (f, builder);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
+  }
+};
+
+class svsel_impl : public quiet<function_base>
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svsel corresponds exactly to VEC_COND_EXPR.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+    return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
+				gimple_call_arg (f.call, 1),
+				gimple_call_arg (f.call, 2));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond).  */
+    e.rotate_inputs_left (0, 3);
+    insn_code icode = convert_optab_handler (vcond_mask_optab,
+					     e.vector_mode (0),
+					     e.gp_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements svset2, svset3 and svset4.  */
+class svset_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svset_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree rhs_tuple = gimple_call_arg (f.call, 0);
+    tree index = gimple_call_arg (f.call, 1);
+    tree rhs_vector = gimple_call_arg (f.call, 2);
+
+    /* Replace the call with two statements: a copy of the full tuple
+       to the call result, followed by an update of the individual vector.
+
+       The fold routines expect the replacement statement to have the
+       same lhs as the original call, so return the copy statement
+       rather than the field update.  */
+    gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
+
+    /* Get a reference to the individual vector.  */
+    tree field = tuple_type_field (TREE_TYPE (f.lhs));
+    tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
+			     f.lhs, field, NULL_TREE);
+    tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
+			      lhs_array, index, NULL_TREE, NULL_TREE);
+    gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
+    gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
+
+    return copy;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx rhs_tuple = e.args[0];
+    unsigned int index = INTVAL (e.args[1]);
+    rtx rhs_vector = e.args[2];
+
+    /* First copy the full tuple to the target register.  */
+    rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
+    emit_move_insn (lhs_tuple, rhs_tuple);
+
+    /* ...then update the individual vector.  */
+    rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
+					  lhs_tuple, GET_MODE (lhs_tuple),
+					  index * BYTES_PER_SVE_VECTOR);
+    emit_move_insn (lhs_vector, rhs_vector);
+    return lhs_vector;
+  }
+};
+
+class svsetffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
+  }
+};
+
+class svst1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
+    return gimple_build_call_internal (IFN_MASK_STORE, 4,
+				       base, cookie, pred, rhs);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = convert_optab_handler (maskstore_optab,
+					     e.vector_mode (0), e.gp_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svst1_scatter_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, as required by mask_scatter_store_optab.  */
+    e.rotate_inputs_left (0, 6);
+    machine_mode mem_mode = e.memory_vector_mode ();
+    machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+    insn_code icode = convert_optab_handler (mask_scatter_store_optab,
+					     mem_mode, int_mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements truncating forms of svst1_scatter.  */
+class svst1_scatter_truncate_impl : public truncating_store
+{
+public:
+  CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode)
+    : truncating_store (to_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    e.prepare_gather_address_operands (1);
+    /* Put the predicate last, since the truncating scatters use the same
+       operand order as mask_scatter_store_optab.  */
+    e.rotate_inputs_left (0, 6);
+    insn_code icode = code_for_aarch64_scatter_store_trunc
+      (e.memory_vector_mode (), e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+/* Implements truncating contiguous forms of svst1.  */
+class svst1_truncate_impl : public truncating_store
+{
+public:
+  CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode)
+    : truncating_store (to_mode) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
+						    e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+/* Implements svst2, svst3 and svst4.  */
+class svst234_impl : public full_width_access
+{
+public:
+  CONSTEXPR svst234_impl (unsigned int vectors_per_tuple)
+    : full_width_access (vectors_per_tuple) {}
+
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree vectype = f.vector_type (0);
+
+    /* Get the predicate and base pointer.  */
+    gimple_seq stmts = NULL;
+    tree pred = f.convert_pred (stmts, vectype, 0);
+    tree base = f.fold_contiguous_base (stmts, vectype);
+    gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+
+    /* View the stored data as an array of vectors.  */
+    unsigned int num_args = gimple_call_num_args (f.call);
+    tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
+    tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
+    tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
+
+    tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
+    return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
+				       base, cookie, pred, rhs_array);
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode tuple_mode = GET_MODE (e.args.last ());
+    insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
+					     tuple_mode, e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svstnt1_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svsub_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svsub_impl ()
+    : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* Canonicalize subtractions of constants to additions.  */
+    machine_mode mode = e.vector_mode (0);
+    if (e.try_negating_argument (2, mode))
+      return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
+
+    return rtx_code_function::expand (e);
+  }
+};
+
+class svtbl_impl : public permute
+{
+public:
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
+  }
+};
+
+/* Implements svtrn1 and svtrn2.  */
+class svtrn_impl : public binary_permute
+{
+public:
+  CONSTEXPR svtrn_impl (int base)
+    : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
+       svtrn2: as for svtrn1, but with 1 added to each index.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 2, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      {
+	builder.quick_push (m_base + i * 2);
+	builder.quick_push (m_base + i * 2 + nelts);
+      }
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svtrn1, 1 for svtrn2.  */
+  unsigned int m_base;
+};
+
+/* Base class for svundef{,2,3,4}.  */
+class svundef_impl : public quiet<multi_vector_function>
+{
+public:
+  CONSTEXPR svundef_impl (unsigned int vectors_per_tuple)
+    : quiet<multi_vector_function> (vectors_per_tuple) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Don't fold svundef at the gimple level.  There's no exact
+       correspondence for SSA_NAMEs, and we explicitly don't want
+       to generate a specific value (like an all-zeros vector).  */
+    if (vectors_per_tuple () == 1)
+      return NULL;
+    return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs)));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    rtx target = e.get_reg_target ();
+    emit_clobber (copy_rtx (target));
+    return target;
+  }
+};
+
+/* Implements svunpklo and svunpkhi.  */
+class svunpk_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* Don't fold the predicate ops, since every bit of the svbool_t
+       result is significant.  */
+    if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
+      return NULL;
+
+    /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
+       and VEC_UNPACK_HI_EXPR for big-endian.  */
+    bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
+    tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
+    return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    machine_mode mode = GET_MODE (e.args[0]);
+    unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
+    unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
+    insn_code icode;
+    if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      icode = code_for_aarch64_sve_punpk (unpacku, mode);
+    else
+      {
+	int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
+	icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
+      }
+    return e.use_exact_insn (icode);
+  }
+
+  /* True for svunpkhi, false for svunpklo.  */
+  bool m_high_p;
+};
+
+/* Also implements svsudot.  */
+class svusdot_impl : public function_base
+{
+public:
+  CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    /* The implementation of the ACLE function svsudot (for the non-lane
+       version) is through the USDOT instruction but with the second and third
+       inputs swapped.  */
+    if (m_su)
+      e.rotate_inputs_left (1, 2);
+    /* The ACLE function has the same order requirements as for svdot.
+       While there's no requirement for the RTL pattern to have the same sort
+       of order as that for <sur>dot_prod, it's easier to read.
+       Hence we do the same rotation on arguments as svdot_impl does.  */
+    e.rotate_inputs_left (0, 3);
+    machine_mode mode = e.vector_mode (0);
+    insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+    return e.use_exact_insn (icode);
+  }
+
+private:
+  bool m_su;
+};
+
+/* Implements svuzp1 and svuzp2.  */
+class svuzp_impl : public binary_permute
+{
+public:
+  CONSTEXPR svuzp_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svuzp1: { 0, 2, 4, 6, ... }
+       svuzp2: { 1, 3, 5, 7, ... }.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    vec_perm_builder builder (nelts, 1, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      builder.quick_push (m_base + i * 2);
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svuzp1, 1 for svuzp2.  */
+  unsigned int m_base;
+};
+
+/* A function_base for svwhilele and svwhilelt functions.  */
+class svwhilelx_impl : public while_comparison
+{
+public:
+  CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
+    : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
+  {}
+
+  /* Try to fold a call by treating its arguments as constants of type T.  */
+  template<typename T>
+  gimple *
+  fold_type (gimple_folder &f) const
+  {
+    /* Only handle cases in which both operands are constant.  */
+    T arg0, arg1;
+    if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
+	|| !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
+      return NULL;
+
+    /* Check whether the result is known to be all-false.  */
+    if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
+      return f.fold_to_pfalse ();
+
+    /* Punt if we can't tell at compile time whether the result
+       is all-false.  */
+    if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
+      return NULL;
+
+    /* At this point we know the result has at least one set element.  */
+    poly_uint64 diff = arg1 - arg0;
+    poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
+
+    /* Canonicalize the svwhilele form to the svwhilelt form.  Subtract
+       from NELTS rather than adding to DIFF, to prevent overflow.  */
+    if (m_eq_p)
+      nelts -= 1;
+
+    /* Check whether the result is known to be all-true.  */
+    if (known_ge (diff, nelts))
+      return f.fold_to_ptrue ();
+
+    /* Punt if DIFF might not be the actual number of set elements
+       in the result.  Conditional equality is fine.  */
+    if (maybe_gt (diff, nelts))
+      return NULL;
+
+    /* At this point we know that the predicate will have DIFF set elements
+       for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
+       after rather than before ARG1 is reached).  See if we can create
+       the predicate at compile time.  */
+    unsigned HOST_WIDE_INT vl;
+    if (diff.is_constant (&vl))
+      /* Overflow is no longer possible after the checks above.  */
+      return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
+
+    return NULL;
+  }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    if (f.type_suffix (1).unsigned_p)
+      return fold_type<poly_uint64> (f);
+    else
+      return fold_type<poly_int64> (f);
+  }
+
+  /* True svwhilele, false for svwhilelt.  */
+  bool m_eq_p;
+};
+
+class svwrffr_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const OVERRIDE
+  {
+    return CP_WRITE_FFR;
+  }
+
+  rtx
+  expand (function_expander &e) const OVERRIDE
+  {
+    return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
+  }
+};
+
+/* Implements svzip1 and svzip2.  */
+class svzip_impl : public binary_permute
+{
+public:
+  CONSTEXPR svzip_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
+       svzip2: as for svzip1, but with nelts / 2 added to each index.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
+    poly_uint64 base = m_base * exact_div (nelts, 2);
+    vec_perm_builder builder (nelts, 2, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      {
+	builder.quick_push (base + i);
+	builder.quick_push (base + i + nelts);
+      }
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svzip1, 1 for svzip2.  */
+  unsigned int m_base;
+};
+
+} /* end anonymous namespace */
+
+namespace aarch64_sve {
+
+FUNCTION (svabd, svabd_impl,)
+FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
+FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
+FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
+FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
+FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
+FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
+FUNCTION (svadda, svadda_impl,)
+FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
+FUNCTION (svadrb, svadr_bhwd_impl, (0))
+FUNCTION (svadrd, svadr_bhwd_impl, (3))
+FUNCTION (svadrh, svadr_bhwd_impl, (1))
+FUNCTION (svadrw, svadr_bhwd_impl, (2))
+FUNCTION (svand, rtx_code_function, (AND, AND))
+FUNCTION (svandv, reduction, (UNSPEC_ANDV))
+FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
+FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
+FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
+FUNCTION (svbic, svbic_impl,)
+FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
+FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
+FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
+FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
+FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
+FUNCTION (svcadd, svcadd_impl,)
+FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
+FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
+FUNCTION (svcls, unary_count, (CLRSB))
+FUNCTION (svclz, unary_count, (CLZ))
+FUNCTION (svcmla, svcmla_impl,)
+FUNCTION (svcmla_lane, svcmla_lane_impl,)
+FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
+FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
+					  UNSPEC_COND_CMPEQ_WIDE))
+FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
+FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
+					  UNSPEC_COND_CMPHS_WIDE))
+FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
+FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
+					  UNSPEC_COND_CMPHI_WIDE))
+FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
+FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
+					  UNSPEC_COND_CMPLS_WIDE))
+FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
+FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
+					  UNSPEC_COND_CMPLO_WIDE))
+FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
+FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
+					  UNSPEC_COND_CMPNE_WIDE))
+FUNCTION (svcmpuo, svcmpuo_impl,)
+FUNCTION (svcnot, svcnot_impl,)
+FUNCTION (svcnt, unary_count, (POPCOUNT))
+FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
+FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
+FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
+FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
+FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
+FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
+FUNCTION (svcntp, svcntp_impl,)
+FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
+FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
+FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
+FUNCTION (svcreate2, svcreate_impl, (2))
+FUNCTION (svcreate3, svcreate_impl, (3))
+FUNCTION (svcreate4, svcreate_impl, (4))
+FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
+FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdot, svdot_impl,)
+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
+FUNCTION (svdup, svdup_impl,)
+FUNCTION (svdup_lane, svdup_lane_impl,)
+FUNCTION (svdupq, svdupq_impl,)
+FUNCTION (svdupq_lane, svdupq_lane_impl,)
+FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
+FUNCTION (sveorv, reduction, (UNSPEC_XORV))
+FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
+FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
+FUNCTION (svextb, svext_bhw_impl, (QImode))
+FUNCTION (svexth, svext_bhw_impl, (HImode))
+FUNCTION (svextw, svext_bhw_impl, (SImode))
+FUNCTION (svget2, svget_impl, (2))
+FUNCTION (svget3, svget_impl, (3))
+FUNCTION (svget4, svget_impl, (4))
+FUNCTION (svindex, svindex_impl,)
+FUNCTION (svinsr, svinsr_impl,)
+FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
+FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
+FUNCTION (svld1, svld1_impl,)
+FUNCTION (svld1_gather, svld1_gather_impl,)
+FUNCTION (svld1ro, svld1ro_impl,)
+FUNCTION (svld1rq, svld1rq_impl,)
+FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svld2, svld234_impl, (2))
+FUNCTION (svld3, svld234_impl, (3))
+FUNCTION (svld4, svld234_impl, (4))
+FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
+FUNCTION (svldff1_gather, svldff1_gather_impl,)
+FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
+FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
+FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
+FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
+FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
+FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
+FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
+FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
+FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
+FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
+FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
+FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
+FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
+FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
+FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
+FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
+FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
+FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
+FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
+FUNCTION (svldnt1, svldnt1_impl,)
+FUNCTION (svlen, svlen_impl,)
+FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
+FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
+FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
+FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
+FUNCTION (svmad, svmad_impl,)
+FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
+FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
+FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
+FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
+FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
+FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
+FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
+FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
+FUNCTION (svmla, svmla_impl,)
+FUNCTION (svmla_lane, svmla_lane_impl,)
+FUNCTION (svmls, svmls_impl,)
+FUNCTION (svmls_lane, svmls_lane_impl,)
+FUNCTION (svmmla, svmmla_impl,)
+FUNCTION (svmov, svmov_impl,)
+FUNCTION (svmsb, svmsb_impl,)
+FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
+FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
+FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
+					  UNSPEC_UMUL_HIGHPART, -1))
+FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
+FUNCTION (svnand, svnand_impl,)
+FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
+FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
+FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
+FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
+FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
+FUNCTION (svnor, svnor_impl,)
+FUNCTION (svnot, svnot_impl,)
+FUNCTION (svorn, svorn_impl,)
+FUNCTION (svorr, rtx_code_function, (IOR, IOR))
+FUNCTION (svorv, reduction, (UNSPEC_IORV))
+FUNCTION (svpfalse, svpfalse_impl,)
+FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
+FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
+FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
+FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
+FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
+FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
+FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
+FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
+FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
+FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
+FUNCTION (svptest_any, svptest_impl, (NE))
+FUNCTION (svptest_first, svptest_impl, (LT))
+FUNCTION (svptest_last, svptest_impl, (LTU))
+FUNCTION (svptrue, svptrue_impl,)
+FUNCTION (svptrue_pat, svptrue_pat_impl,)
+FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
+FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
+FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
+FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
+FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
+FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
+FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
+FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
+FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
+FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
+FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
+FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
+FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
+FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
+FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
+FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
+FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
+FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
+FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
+FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
+FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
+FUNCTION (svrdffr, svrdffr_impl,)
+FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
+FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
+FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
+FUNCTION (svreinterpret, svreinterpret_impl,)
+FUNCTION (svrev, svrev_impl,)
+FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
+FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
+FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
+FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
+FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
+FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
+FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
+FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
+FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
+FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
+FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
+FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
+FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
+FUNCTION (svsel, svsel_impl,)
+FUNCTION (svset2, svset_impl, (2))
+FUNCTION (svset3, svset_impl, (3))
+FUNCTION (svset4, svset_impl, (4))
+FUNCTION (svsetffr, svsetffr_impl,)
+FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
+FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
+FUNCTION (svst1, svst1_impl,)
+FUNCTION (svst1_scatter, svst1_scatter_impl,)
+FUNCTION (svst1b, svst1_truncate_impl, (QImode))
+FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
+FUNCTION (svst1h, svst1_truncate_impl, (HImode))
+FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
+FUNCTION (svst1w, svst1_truncate_impl, (SImode))
+FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
+FUNCTION (svst2, svst234_impl, (2))
+FUNCTION (svst3, svst234_impl, (3))
+FUNCTION (svst4, svst234_impl, (4))
+FUNCTION (svstnt1, svstnt1_impl,)
+FUNCTION (svsub, svsub_impl,)
+FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
+FUNCTION (svsudot, svusdot_impl, (true))
+FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
+FUNCTION (svtbl, svtbl_impl,)
+FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
+FUNCTION (svtrn1, svtrn_impl, (0))
+FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
+					   UNSPEC_TRN1Q))
+FUNCTION (svtrn2, svtrn_impl, (1))
+FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
+					   UNSPEC_TRN2Q))
+FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
+FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
+FUNCTION (svundef, svundef_impl, (1))
+FUNCTION (svundef2, svundef_impl, (2))
+FUNCTION (svundef3, svundef_impl, (3))
+FUNCTION (svundef4, svundef_impl, (4))
+FUNCTION (svunpkhi, svunpk_impl, (true))
+FUNCTION (svunpklo, svunpk_impl, (false))
+FUNCTION (svusdot, svusdot_impl, (false))
+FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
+FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
+FUNCTION (svuzp1, svuzp_impl, (0))
+FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
+					   UNSPEC_UZP1Q))
+FUNCTION (svuzp2, svuzp_impl, (1))
+FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
+					   UNSPEC_UZP2Q))
+FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
+FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
+FUNCTION (svwrffr, svwrffr_impl,)
+FUNCTION (svzip1, svzip_impl, (0))
+FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
+					   UNSPEC_ZIP1Q))
+FUNCTION (svzip2, svzip_impl, (1))
+FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
+					   UNSPEC_ZIP2Q))
+
+} /* end namespace aarch64_sve */
author	anatofuz
date	Thu, 13 Feb 2020 11:34:05 +0900
parents
children