view gcc/config/aarch64/arm_neon.h @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line source

/* ARM NEON intrinsics include file.

   Copyright (C) 2011-2018 Free Software Foundation, Inc.
   Contributed by ARM Ltd.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published
   by the Free Software Foundation; either version 3, or (at your
   option) any later version.

   GCC is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
   License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#ifndef _AARCH64_NEON_H_
#define _AARCH64_NEON_H_

#pragma GCC push_options
#pragma GCC target ("+nothing+simd")

#include <stdint.h>

#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
#define __AARCH64_INT64_C(__C) ((int64_t) __C)

typedef __Int8x8_t int8x8_t;
typedef __Int16x4_t int16x4_t;
typedef __Int32x2_t int32x2_t;
typedef __Int64x1_t int64x1_t;
typedef __Float16x4_t float16x4_t;
typedef __Float32x2_t float32x2_t;
typedef __Poly8x8_t poly8x8_t;
typedef __Poly16x4_t poly16x4_t;
typedef __Uint8x8_t uint8x8_t;
typedef __Uint16x4_t uint16x4_t;
typedef __Uint32x2_t uint32x2_t;
typedef __Float64x1_t float64x1_t;
typedef __Uint64x1_t uint64x1_t;
typedef __Int8x16_t int8x16_t;
typedef __Int16x8_t int16x8_t;
typedef __Int32x4_t int32x4_t;
typedef __Int64x2_t int64x2_t;
typedef __Float16x8_t float16x8_t;
typedef __Float32x4_t float32x4_t;
typedef __Float64x2_t float64x2_t;
typedef __Poly8x16_t poly8x16_t;
typedef __Poly16x8_t poly16x8_t;
typedef __Poly64x2_t poly64x2_t;
typedef __Poly64x1_t poly64x1_t;
typedef __Uint8x16_t uint8x16_t;
typedef __Uint16x8_t uint16x8_t;
typedef __Uint32x4_t uint32x4_t;
typedef __Uint64x2_t uint64x2_t;

typedef __Poly8_t poly8_t;
typedef __Poly16_t poly16_t;
typedef __Poly64_t poly64_t;
typedef __Poly128_t poly128_t;

typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;

typedef struct int8x8x2_t
{
  int8x8_t val[2];
} int8x8x2_t;

typedef struct int8x16x2_t
{
  int8x16_t val[2];
} int8x16x2_t;

typedef struct int16x4x2_t
{
  int16x4_t val[2];
} int16x4x2_t;

typedef struct int16x8x2_t
{
  int16x8_t val[2];
} int16x8x2_t;

typedef struct int32x2x2_t
{
  int32x2_t val[2];
} int32x2x2_t;

typedef struct int32x4x2_t
{
  int32x4_t val[2];
} int32x4x2_t;

typedef struct int64x1x2_t
{
  int64x1_t val[2];
} int64x1x2_t;

typedef struct int64x2x2_t
{
  int64x2_t val[2];
} int64x2x2_t;

typedef struct uint8x8x2_t
{
  uint8x8_t val[2];
} uint8x8x2_t;

typedef struct uint8x16x2_t
{
  uint8x16_t val[2];
} uint8x16x2_t;

typedef struct uint16x4x2_t
{
  uint16x4_t val[2];
} uint16x4x2_t;

typedef struct uint16x8x2_t
{
  uint16x8_t val[2];
} uint16x8x2_t;

typedef struct uint32x2x2_t
{
  uint32x2_t val[2];
} uint32x2x2_t;

typedef struct uint32x4x2_t
{
  uint32x4_t val[2];
} uint32x4x2_t;

typedef struct uint64x1x2_t
{
  uint64x1_t val[2];
} uint64x1x2_t;

typedef struct uint64x2x2_t
{
  uint64x2_t val[2];
} uint64x2x2_t;

typedef struct float16x4x2_t
{
  float16x4_t val[2];
} float16x4x2_t;

typedef struct float16x8x2_t
{
  float16x8_t val[2];
} float16x8x2_t;

typedef struct float32x2x2_t
{
  float32x2_t val[2];
} float32x2x2_t;

typedef struct float32x4x2_t
{
  float32x4_t val[2];
} float32x4x2_t;

typedef struct float64x2x2_t
{
  float64x2_t val[2];
} float64x2x2_t;

typedef struct float64x1x2_t
{
  float64x1_t val[2];
} float64x1x2_t;

typedef struct poly8x8x2_t
{
  poly8x8_t val[2];
} poly8x8x2_t;

typedef struct poly8x16x2_t
{
  poly8x16_t val[2];
} poly8x16x2_t;

typedef struct poly16x4x2_t
{
  poly16x4_t val[2];
} poly16x4x2_t;

typedef struct poly16x8x2_t
{
  poly16x8_t val[2];
} poly16x8x2_t;

typedef struct poly64x1x2_t
{
  poly64x1_t val[2];
} poly64x1x2_t;

typedef struct poly64x1x3_t
{
  poly64x1_t val[3];
} poly64x1x3_t;

typedef struct poly64x1x4_t
{
  poly64x1_t val[4];
} poly64x1x4_t;

typedef struct poly64x2x2_t
{
  poly64x2_t val[2];
} poly64x2x2_t;

typedef struct poly64x2x3_t
{
  poly64x2_t val[3];
} poly64x2x3_t;

typedef struct poly64x2x4_t
{
  poly64x2_t val[4];
} poly64x2x4_t;

typedef struct int8x8x3_t
{
  int8x8_t val[3];
} int8x8x3_t;

typedef struct int8x16x3_t
{
  int8x16_t val[3];
} int8x16x3_t;

typedef struct int16x4x3_t
{
  int16x4_t val[3];
} int16x4x3_t;

typedef struct int16x8x3_t
{
  int16x8_t val[3];
} int16x8x3_t;

typedef struct int32x2x3_t
{
  int32x2_t val[3];
} int32x2x3_t;

typedef struct int32x4x3_t
{
  int32x4_t val[3];
} int32x4x3_t;

typedef struct int64x1x3_t
{
  int64x1_t val[3];
} int64x1x3_t;

typedef struct int64x2x3_t
{
  int64x2_t val[3];
} int64x2x3_t;

typedef struct uint8x8x3_t
{
  uint8x8_t val[3];
} uint8x8x3_t;

typedef struct uint8x16x3_t
{
  uint8x16_t val[3];
} uint8x16x3_t;

typedef struct uint16x4x3_t
{
  uint16x4_t val[3];
} uint16x4x3_t;

typedef struct uint16x8x3_t
{
  uint16x8_t val[3];
} uint16x8x3_t;

typedef struct uint32x2x3_t
{
  uint32x2_t val[3];
} uint32x2x3_t;

typedef struct uint32x4x3_t
{
  uint32x4_t val[3];
} uint32x4x3_t;

typedef struct uint64x1x3_t
{
  uint64x1_t val[3];
} uint64x1x3_t;

typedef struct uint64x2x3_t
{
  uint64x2_t val[3];
} uint64x2x3_t;

typedef struct float16x4x3_t
{
  float16x4_t val[3];
} float16x4x3_t;

typedef struct float16x8x3_t
{
  float16x8_t val[3];
} float16x8x3_t;

typedef struct float32x2x3_t
{
  float32x2_t val[3];
} float32x2x3_t;

typedef struct float32x4x3_t
{
  float32x4_t val[3];
} float32x4x3_t;

typedef struct float64x2x3_t
{
  float64x2_t val[3];
} float64x2x3_t;

typedef struct float64x1x3_t
{
  float64x1_t val[3];
} float64x1x3_t;

typedef struct poly8x8x3_t
{
  poly8x8_t val[3];
} poly8x8x3_t;

typedef struct poly8x16x3_t
{
  poly8x16_t val[3];
} poly8x16x3_t;

typedef struct poly16x4x3_t
{
  poly16x4_t val[3];
} poly16x4x3_t;

typedef struct poly16x8x3_t
{
  poly16x8_t val[3];
} poly16x8x3_t;

typedef struct int8x8x4_t
{
  int8x8_t val[4];
} int8x8x4_t;

typedef struct int8x16x4_t
{
  int8x16_t val[4];
} int8x16x4_t;

typedef struct int16x4x4_t
{
  int16x4_t val[4];
} int16x4x4_t;

typedef struct int16x8x4_t
{
  int16x8_t val[4];
} int16x8x4_t;

typedef struct int32x2x4_t
{
  int32x2_t val[4];
} int32x2x4_t;

typedef struct int32x4x4_t
{
  int32x4_t val[4];
} int32x4x4_t;

typedef struct int64x1x4_t
{
  int64x1_t val[4];
} int64x1x4_t;

typedef struct int64x2x4_t
{
  int64x2_t val[4];
} int64x2x4_t;

typedef struct uint8x8x4_t
{
  uint8x8_t val[4];
} uint8x8x4_t;

typedef struct uint8x16x4_t
{
  uint8x16_t val[4];
} uint8x16x4_t;

typedef struct uint16x4x4_t
{
  uint16x4_t val[4];
} uint16x4x4_t;

typedef struct uint16x8x4_t
{
  uint16x8_t val[4];
} uint16x8x4_t;

typedef struct uint32x2x4_t
{
  uint32x2_t val[4];
} uint32x2x4_t;

typedef struct uint32x4x4_t
{
  uint32x4_t val[4];
} uint32x4x4_t;

typedef struct uint64x1x4_t
{
  uint64x1_t val[4];
} uint64x1x4_t;

typedef struct uint64x2x4_t
{
  uint64x2_t val[4];
} uint64x2x4_t;

typedef struct float16x4x4_t
{
  float16x4_t val[4];
} float16x4x4_t;

typedef struct float16x8x4_t
{
  float16x8_t val[4];
} float16x8x4_t;

typedef struct float32x2x4_t
{
  float32x2_t val[4];
} float32x2x4_t;

typedef struct float32x4x4_t
{
  float32x4_t val[4];
} float32x4x4_t;

typedef struct float64x2x4_t
{
  float64x2_t val[4];
} float64x2x4_t;

typedef struct float64x1x4_t
{
  float64x1_t val[4];
} float64x1x4_t;

typedef struct poly8x8x4_t
{
  poly8x8_t val[4];
} poly8x8x4_t;

typedef struct poly8x16x4_t
{
  poly8x16_t val[4];
} poly8x16x4_t;

typedef struct poly16x4x4_t
{
  poly16x4_t val[4];
} poly16x4x4_t;

typedef struct poly16x8x4_t
{
  poly16x8_t val[4];
} poly16x8x4_t;

/* __aarch64_vdup_lane internal macros.  */
#define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
  vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))

#define __aarch64_vdup_lane_f16(__a, __b) \
   __aarch64_vdup_lane_any (f16, , __a, __b)
#define __aarch64_vdup_lane_f32(__a, __b) \
   __aarch64_vdup_lane_any (f32, , __a, __b)
#define __aarch64_vdup_lane_f64(__a, __b) \
   __aarch64_vdup_lane_any (f64, , __a, __b)
#define __aarch64_vdup_lane_p8(__a, __b) \
   __aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_lane_p16(__a, __b) \
   __aarch64_vdup_lane_any (p16, , __a, __b)
#define __aarch64_vdup_lane_p64(__a, __b) \
   __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_lane_s8(__a, __b) \
   __aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_lane_s16(__a, __b) \
   __aarch64_vdup_lane_any (s16, , __a, __b)
#define __aarch64_vdup_lane_s32(__a, __b) \
   __aarch64_vdup_lane_any (s32, , __a, __b)
#define __aarch64_vdup_lane_s64(__a, __b) \
  __aarch64_vdup_lane_any (s64, , __a, __b)
#define __aarch64_vdup_lane_u8(__a, __b) \
   __aarch64_vdup_lane_any (u8, , __a, __b)
#define __aarch64_vdup_lane_u16(__a, __b) \
   __aarch64_vdup_lane_any (u16, , __a, __b)
#define __aarch64_vdup_lane_u32(__a, __b) \
   __aarch64_vdup_lane_any (u32, , __a, __b)
#define __aarch64_vdup_lane_u64(__a, __b) \
   __aarch64_vdup_lane_any (u64, , __a, __b)

/* __aarch64_vdup_laneq internal macros.  */
#define __aarch64_vdup_laneq_f16(__a, __b) \
   __aarch64_vdup_lane_any (f16, , __a, __b)
#define __aarch64_vdup_laneq_f32(__a, __b) \
   __aarch64_vdup_lane_any (f32, , __a, __b)
#define __aarch64_vdup_laneq_f64(__a, __b) \
   __aarch64_vdup_lane_any (f64, , __a, __b)
#define __aarch64_vdup_laneq_p8(__a, __b) \
   __aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_laneq_p16(__a, __b) \
   __aarch64_vdup_lane_any (p16, , __a, __b)
#define __aarch64_vdup_laneq_p64(__a, __b) \
   __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_laneq_s8(__a, __b) \
   __aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_laneq_s16(__a, __b) \
   __aarch64_vdup_lane_any (s16, , __a, __b)
#define __aarch64_vdup_laneq_s32(__a, __b) \
   __aarch64_vdup_lane_any (s32, , __a, __b)
#define __aarch64_vdup_laneq_s64(__a, __b) \
   __aarch64_vdup_lane_any (s64, , __a, __b)
#define __aarch64_vdup_laneq_u8(__a, __b) \
   __aarch64_vdup_lane_any (u8, , __a, __b)
#define __aarch64_vdup_laneq_u16(__a, __b) \
   __aarch64_vdup_lane_any (u16, , __a, __b)
#define __aarch64_vdup_laneq_u32(__a, __b) \
   __aarch64_vdup_lane_any (u32, , __a, __b)
#define __aarch64_vdup_laneq_u64(__a, __b) \
   __aarch64_vdup_lane_any (u64, , __a, __b)

/* __aarch64_vdupq_lane internal macros.  */
#define __aarch64_vdupq_lane_f16(__a, __b) \
   __aarch64_vdup_lane_any (f16, q, __a, __b)
#define __aarch64_vdupq_lane_f32(__a, __b) \
   __aarch64_vdup_lane_any (f32, q, __a, __b)
#define __aarch64_vdupq_lane_f64(__a, __b) \
   __aarch64_vdup_lane_any (f64, q, __a, __b)
#define __aarch64_vdupq_lane_p8(__a, __b) \
   __aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_lane_p16(__a, __b) \
   __aarch64_vdup_lane_any (p16, q, __a, __b)
#define __aarch64_vdupq_lane_p64(__a, __b) \
   __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_lane_s8(__a, __b) \
   __aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_lane_s16(__a, __b) \
   __aarch64_vdup_lane_any (s16, q, __a, __b)
#define __aarch64_vdupq_lane_s32(__a, __b) \
   __aarch64_vdup_lane_any (s32, q, __a, __b)
#define __aarch64_vdupq_lane_s64(__a, __b) \
   __aarch64_vdup_lane_any (s64, q, __a, __b)
#define __aarch64_vdupq_lane_u8(__a, __b) \
   __aarch64_vdup_lane_any (u8, q, __a, __b)
#define __aarch64_vdupq_lane_u16(__a, __b) \
   __aarch64_vdup_lane_any (u16, q, __a, __b)
#define __aarch64_vdupq_lane_u32(__a, __b) \
   __aarch64_vdup_lane_any (u32, q, __a, __b)
#define __aarch64_vdupq_lane_u64(__a, __b) \
   __aarch64_vdup_lane_any (u64, q, __a, __b)

/* __aarch64_vdupq_laneq internal macros.  */
#define __aarch64_vdupq_laneq_f16(__a, __b) \
   __aarch64_vdup_lane_any (f16, q, __a, __b)
#define __aarch64_vdupq_laneq_f32(__a, __b) \
   __aarch64_vdup_lane_any (f32, q, __a, __b)
#define __aarch64_vdupq_laneq_f64(__a, __b) \
   __aarch64_vdup_lane_any (f64, q, __a, __b)
#define __aarch64_vdupq_laneq_p8(__a, __b) \
   __aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_laneq_p16(__a, __b) \
   __aarch64_vdup_lane_any (p16, q, __a, __b)
#define __aarch64_vdupq_laneq_p64(__a, __b) \
   __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_laneq_s8(__a, __b) \
   __aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_laneq_s16(__a, __b) \
   __aarch64_vdup_lane_any (s16, q, __a, __b)
#define __aarch64_vdupq_laneq_s32(__a, __b) \
   __aarch64_vdup_lane_any (s32, q, __a, __b)
#define __aarch64_vdupq_laneq_s64(__a, __b) \
   __aarch64_vdup_lane_any (s64, q, __a, __b)
#define __aarch64_vdupq_laneq_u8(__a, __b) \
   __aarch64_vdup_lane_any (u8, q, __a, __b)
#define __aarch64_vdupq_laneq_u16(__a, __b) \
   __aarch64_vdup_lane_any (u16, q, __a, __b)
#define __aarch64_vdupq_laneq_u32(__a, __b) \
   __aarch64_vdup_lane_any (u32, q, __a, __b)
#define __aarch64_vdupq_laneq_u64(__a, __b) \
   __aarch64_vdup_lane_any (u64, q, __a, __b)

/* Internal macro for lane indices.  */

#define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
#define __AARCH64_LANE_CHECK(__vec, __idx)	\
	__builtin_aarch64_im_lane_boundsi (sizeof(__vec), sizeof(__vec[0]), __idx)

/* For big-endian, GCC's vector indices are the opposite way around
   to the architectural lane indices used by Neon intrinsics.  */
#ifdef __AARCH64EB__
#define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
#else
#define __aarch64_lane(__vec, __idx) __idx
#endif

/* vget_lane internal macro.  */
#define __aarch64_vget_lane_any(__vec, __index)				\
  __extension__								\
  ({									\
    __AARCH64_LANE_CHECK (__vec, __index);				\
    __vec[__aarch64_lane (__vec, __index)];				\
  })

/* vset_lane and vld1_lane internal macro.  */
#define __aarch64_vset_lane_any(__elem, __vec, __index)			\
  __extension__								\
  ({									\
    __AARCH64_LANE_CHECK (__vec, __index);				\
    __vec[__aarch64_lane (__vec, __index)] = __elem;			\
    __vec;								\
  })

/* vadd  */
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_f32 (float32x2_t __a, float32x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_f64 (float64x1_t __a, float64x1_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_f32 (float32x4_t __a, float32x4_t __b)
{
  return __a + __b;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_f64 (float64x2_t __a, float64x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a + __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
						   (int8x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
						     (int8x16_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
						    (int16x8_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_s8 (int16x8_t __a, int8x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_s16 (int32x4_t __a, int16x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_s32 (int64x2_t __a, int32x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
						   (int8x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
{
  return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
{
  return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
{
  return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
{
  return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
						     (int8x16_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
						    (int16x8_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
{
  return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
						  (int8x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
						    (int8x16_t) __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
						   (int16x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
						   (int32x4_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
						   (int8x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
						    (int16x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
						    (int32x2_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
						     (int8x16_t) __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
						    (int16x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
						  (int16x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
						   (int32x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
						   (int64x2_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
						   (int16x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
						    (int64x2_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
{
  return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
  return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
						    (int16x8_t) __b,
						    (int16x8_t) __c);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
  return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
						    (int32x4_t) __b,
						    (int32x4_t) __c);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
  return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
						    (int64x2_t) __b,
						    (int64x2_t) __c);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
{
  return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
  return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
						     (int16x8_t) __b,
						     (int16x8_t) __c);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
  return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
						     (int32x4_t) __b,
						     (int32x4_t) __c);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
  return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
						     (int64x2_t) __b,
						     (int64x2_t) __c);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdiv_f32 (float32x2_t __a, float32x2_t __b)
{
  return __a / __b;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdiv_f64 (float64x1_t __a, float64x1_t __b)
{
  return __a / __b;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdivq_f32 (float32x4_t __a, float32x4_t __b)
{
  return __a / __b;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdivq_f64 (float64x2_t __a, float64x2_t __b)
{
  return __a / __b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a * __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a * __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a * __b;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_f32 (float32x2_t __a, float32x2_t __b)
{
  return __a * __b;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_f64 (float64x1_t __a, float64x1_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a * __b;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_p8 (poly8x8_t __a, poly8x8_t __b)
{
  return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
						 (int8x8_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a * __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a * __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a * __b;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_f32 (float32x4_t __a, float32x4_t __b)
{
  return __a * __b;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_f64 (float64x2_t __a, float64x2_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a * __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a * __b;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
{
  return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
						   (int8x16_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vand_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a & __b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vandq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a & __b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorr_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a | __b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a | __b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veorq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a ^ __b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbic_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a & ~__b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vorn_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vornq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a | ~__b;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_s8 (int8x8_t __a, int8x8_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_s16 (int16x4_t __a, int16x4_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_s32 (int32x2_t __a, int32x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_f32 (float32x2_t __a, float32x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_f64 (float64x1_t __a, float64x1_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_s64 (int64x1_t __a, int64x1_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_s8 (int8x16_t __a, int8x16_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_s16 (int16x8_t __a, int16x8_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_s32 (int32x4_t __a, int32x4_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_s64 (int64x2_t __a, int64x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_f32 (float32x4_t __a, float32x4_t __b)
{
  return __a - __b;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_f64 (float64x2_t __a, float64x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __a - __b;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __a - __b;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
						   (int8x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
						     (int8x16_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
						    (int16x8_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_s8 (int16x8_t __a, int8x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_s16 (int32x4_t __a, int16x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_s32 (int64x2_t __a, int32x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
						   (int8x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
{
  return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
{
  return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
{
  return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
{
  return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
{
  return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
						     (int8x16_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
{
  return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
						    (int16x8_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
{
  return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s64 (int64x1_t __a, int64x1_t __b)
{
  return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
						  (int8x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
						   (int16x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
						   (int32x2_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
						    (int8x16_t) __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
						   (int16x8_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
						   (int32x4_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
						  (int16x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
						   (int32x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
						   (int64x2_t) __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
						   (int16x8_t) __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
						    (int32x4_t) __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
						    (int64x2_t) __b);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
{
  return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
  return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
						     (int16x8_t) __b,
						     (int16x8_t) __c);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
  return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
						     (int32x4_t) __b,
						     (int32x4_t) __c);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
  return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
						     (int64x2_t) __b,
						     (int64x2_t) __c);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
{
  return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
  return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
						    (int16x8_t) __b,
						    (int16x8_t) __c);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
  return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
						    (int32x4_t) __b,
						    (int32x4_t) __c);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
  return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
						    (int64x2_t) __b,
						    (int64x2_t) __c);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s64 (int64x1_t __a, int64x1_t __b)
{
  return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s64 (int64x2_t __a, int64x2_t __b)
{
  return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqneg_s8 (int8x8_t __a)
{
  return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqneg_s16 (int16x4_t __a)
{
  return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqneg_s32 (int32x2_t __a)
{
  return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqneg_s64 (int64x1_t __a)
{
  return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqnegq_s8 (int8x16_t __a)
{
  return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqnegq_s16 (int16x8_t __a)
{
  return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqnegq_s32 (int32x4_t __a)
{
  return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabs_s8 (int8x8_t __a)
{
  return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabs_s16 (int16x4_t __a)
{
  return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabs_s32 (int32x2_t __a)
{
  return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabs_s64 (int64x1_t __a)
{
  return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabsq_s8 (int8x16_t __a)
{
  return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabsq_s16 (int16x8_t __a)
{
  return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqabsq_s32 (int32x4_t __a)
{
  return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_s8 (uint64_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_s16 (uint64_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_s32 (uint64_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_s64 (uint64_t __a)
{
  return (int64x1_t) {__a};
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_f16 (uint64_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_f32 (uint64_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_u8 (uint64_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_u16 (uint64_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_u32 (uint64_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_u64 (uint64_t __a)
{
  return (uint64x1_t) {__a};
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_f64 (uint64_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_p8 (uint64_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_p16 (uint64_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcreate_p64 (uint64_t __a)
{
  return (poly64x1_t) __a;
}

/* vget_lane  */

__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_f16 (float16x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_f32 (float32x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline float64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_f64 (float64x1_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_p8 (poly8x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_p16 (poly16x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_p64 (poly64x1_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_s8 (int8x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_s16 (int16x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_s32 (int32x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_s64 (int64x1_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_u8 (uint8x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_u16 (uint16x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_u32 (uint32x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_lane_u64 (uint64x1_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

/* vgetq_lane  */

__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_f16 (float16x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_f32 (float32x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline float64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_f64 (float64x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_p8 (poly8x16_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_p16 (poly16x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline poly64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_p64 (poly64x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_s8 (int8x16_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_s16 (int16x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_s32 (int32x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_s64 (int64x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_u8 (uint8x16_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_u16 (uint16x8_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_u32 (uint32x4_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vgetq_lane_u64 (uint64x2_t __a, const int __b)
{
  return __aarch64_vget_lane_any (__a, __b);
}

/* vreinterpret  */

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_f16 (float16x4_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_f64 (float64x1_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_s8 (int8x8_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_s16 (int16x4_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_s32 (int32x2_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_s64 (int64x1_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_f32 (float32x2_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_u8 (uint8x8_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_u16 (uint16x4_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_u32 (uint32x2_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_u64 (uint64x1_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_p16 (poly16x4_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p8_p64 (poly64x1_t __a)
{
  return (poly8x8_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_f64 (float64x2_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_s8 (int8x16_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_s16 (int16x8_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_s32 (int32x4_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_s64 (int64x2_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_f16 (float16x8_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_f32 (float32x4_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_u8 (uint8x16_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_u16 (uint16x8_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_u32 (uint32x4_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_u64 (uint64x2_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_p16 (poly16x8_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_p64 (poly64x2_t __a)
{
  return (poly8x16_t) __a;
}

__extension__ extern __inline poly8x16_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p8_p128 (poly128_t __a)
{
  return (poly8x16_t)__a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_f16 (float16x4_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_f64 (float64x1_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_s8 (int8x8_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_s16 (int16x4_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_s32 (int32x2_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_s64 (int64x1_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_f32 (float32x2_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_u8 (uint8x8_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_u16 (uint16x4_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_u32 (uint32x2_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_u64 (uint64x1_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_p8 (poly8x8_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p16_p64 (poly64x1_t __a)
{
  return (poly16x4_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_f64 (float64x2_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_s8 (int8x16_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_s16 (int16x8_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_s32 (int32x4_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_s64 (int64x2_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_f16 (float16x8_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_f32 (float32x4_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_u8 (uint8x16_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_u16 (uint16x8_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_u32 (uint32x4_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_u64 (uint64x2_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_p8 (poly8x16_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_p64 (poly64x2_t __a)
{
  return (poly16x8_t) __a;
}

__extension__ extern __inline poly16x8_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p16_p128 (poly128_t __a)
{
  return (poly16x8_t)__a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_f16 (float16x4_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_f64 (float64x1_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_s8 (int8x8_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_s16 (int16x4_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_s32 (int32x2_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_s64 (int64x1_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_f32 (float32x2_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_u8 (uint8x8_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_u16 (uint16x4_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_u32 (uint32x2_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_u64 (uint64x1_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_p8 (poly8x8_t __a)
{
  return (poly64x1_t) __a;
}

__extension__ extern __inline poly64x1_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_p64_p16 (poly16x4_t __a)
{
  return (poly64x1_t)__a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_f64 (float64x2_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_s8 (int8x16_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_s16 (int16x8_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_s32 (int32x4_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_s64 (int64x2_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_f16 (float16x8_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_f32 (float32x4_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_p128 (poly128_t __a)
{
  return (poly64x2_t)__a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_u8 (uint8x16_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_u16 (uint16x8_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_p16 (poly16x8_t __a)
{
  return (poly64x2_t)__a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_u32 (uint32x4_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_u64 (uint64x2_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p64_p8 (poly8x16_t __a)
{
  return (poly64x2_t) __a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_p8 (poly8x16_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_p16 (poly16x8_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_f16 (float16x8_t __a)
{
  return (poly128_t) __a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_f32 (float32x4_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_p64 (poly64x2_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_s64 (int64x2_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_u64 (uint64x2_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_s8 (int8x16_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_s16 (int16x8_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_s32 (int32x4_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_u8 (uint8x16_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_u16 (uint16x8_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline poly128_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_p128_u32 (uint32x4_t __a)
{
  return (poly128_t)__a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_f64 (float64x1_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_s8 (int8x8_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_s16 (int16x4_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_s32 (int32x2_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_s64 (int64x1_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_f32 (float32x2_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_u8 (uint8x8_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_u16 (uint16x4_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_u32 (uint32x2_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_u64 (uint64x1_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_p8 (poly8x8_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_p16 (poly16x4_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f16_p64 (poly64x1_t __a)
{
  return (float16x4_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_f64 (float64x2_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_s8 (int8x16_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_s16 (int16x8_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_s32 (int32x4_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_s64 (int64x2_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_f32 (float32x4_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_u8 (uint8x16_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_u16 (uint16x8_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_u32 (uint32x4_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_u64 (uint64x2_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_p8 (poly8x16_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_p128 (poly128_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_p16 (poly16x8_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f16_p64 (poly64x2_t __a)
{
  return (float16x8_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_f16 (float16x4_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_f64 (float64x1_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_s8 (int8x8_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_s16 (int16x4_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_s32 (int32x2_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_s64 (int64x1_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_u8 (uint8x8_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_u16 (uint16x4_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_u32 (uint32x2_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_u64 (uint64x1_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_p8 (poly8x8_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_p16 (poly16x4_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f32_p64 (poly64x1_t __a)
{
  return (float32x2_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_f16 (float16x8_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_f64 (float64x2_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_s8 (int8x16_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_s16 (int16x8_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_s32 (int32x4_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_s64 (int64x2_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_u8 (uint8x16_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_u16 (uint16x8_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_u32 (uint32x4_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_u64 (uint64x2_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_p8 (poly8x16_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_p16 (poly16x8_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_p64 (poly64x2_t __a)
{
  return (float32x4_t) __a;
}

__extension__ extern __inline float32x4_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f32_p128 (poly128_t __a)
{
  return (float32x4_t)__a;
}


__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_f16 (float16x4_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_f32 (float32x2_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_p8 (poly8x8_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_p16 (poly16x4_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_p64 (poly64x1_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_s8 (int8x8_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_s16 (int16x4_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_s32 (int32x2_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_s64 (int64x1_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_u8 (uint8x8_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_u16 (uint16x4_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_u32 (uint32x2_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_f64_u64 (uint64x1_t __a)
{
  return (float64x1_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_f16 (float16x8_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_f32 (float32x4_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_p8 (poly8x16_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_p16 (poly16x8_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_p64 (poly64x2_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_s8 (int8x16_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_s16 (int16x8_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_s32 (int32x4_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_s64 (int64x2_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_u8 (uint8x16_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_u16 (uint16x8_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_u32 (uint32x4_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_f64_u64 (uint64x2_t __a)
{
  return (float64x2_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_f16 (float16x4_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_f64 (float64x1_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_s8 (int8x8_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_s16 (int16x4_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_s32 (int32x2_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_f32 (float32x2_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_u8 (uint8x8_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_u16 (uint16x4_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_u32 (uint32x2_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_u64 (uint64x1_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_p8 (poly8x8_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_p16 (poly16x4_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s64_p64 (poly64x1_t __a)
{
  return (int64x1_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_f64 (float64x2_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_s8 (int8x16_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_s16 (int16x8_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_s32 (int32x4_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_f16 (float16x8_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_f32 (float32x4_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_u8 (uint8x16_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_u16 (uint16x8_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_u32 (uint32x4_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_u64 (uint64x2_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_p8 (poly8x16_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_p16 (poly16x8_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_p64 (poly64x2_t __a)
{
  return (int64x2_t) __a;
}

__extension__ extern __inline int64x2_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s64_p128 (poly128_t __a)
{
  return (int64x2_t)__a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_f16 (float16x4_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_f64 (float64x1_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_s8 (int8x8_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_s16 (int16x4_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_s32 (int32x2_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_s64 (int64x1_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_f32 (float32x2_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_u8 (uint8x8_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_u16 (uint16x4_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_u32 (uint32x2_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_p8 (poly8x8_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_p16 (poly16x4_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u64_p64 (poly64x1_t __a)
{
  return (uint64x1_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_f64 (float64x2_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_s8 (int8x16_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_s16 (int16x8_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_s32 (int32x4_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_s64 (int64x2_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_f16 (float16x8_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_f32 (float32x4_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_u8 (uint8x16_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_u16 (uint16x8_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_u32 (uint32x4_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_p8 (poly8x16_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_p16 (poly16x8_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_p64 (poly64x2_t __a)
{
  return (uint64x2_t) __a;
}

__extension__ extern __inline uint64x2_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u64_p128 (poly128_t __a)
{
  return (uint64x2_t)__a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_f16 (float16x4_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_f64 (float64x1_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_s16 (int16x4_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_s32 (int32x2_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_s64 (int64x1_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_f32 (float32x2_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_u8 (uint8x8_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_u16 (uint16x4_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_u32 (uint32x2_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_u64 (uint64x1_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_p8 (poly8x8_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_p16 (poly16x4_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s8_p64 (poly64x1_t __a)
{
  return (int8x8_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_f64 (float64x2_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_s16 (int16x8_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_s32 (int32x4_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_s64 (int64x2_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_f16 (float16x8_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_f32 (float32x4_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_u8 (uint8x16_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_u16 (uint16x8_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_u32 (uint32x4_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_u64 (uint64x2_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_p8 (poly8x16_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_p16 (poly16x8_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_p64 (poly64x2_t __a)
{
  return (int8x16_t) __a;
}

__extension__ extern __inline int8x16_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s8_p128 (poly128_t __a)
{
  return (int8x16_t)__a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_f16 (float16x4_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_f64 (float64x1_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_s8 (int8x8_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_s32 (int32x2_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_s64 (int64x1_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_f32 (float32x2_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_u8 (uint8x8_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_u16 (uint16x4_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_u32 (uint32x2_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_u64 (uint64x1_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_p8 (poly8x8_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_p16 (poly16x4_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s16_p64 (poly64x1_t __a)
{
  return (int16x4_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_f64 (float64x2_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_s8 (int8x16_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_s32 (int32x4_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_s64 (int64x2_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_f16 (float16x8_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_f32 (float32x4_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_u8 (uint8x16_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_u16 (uint16x8_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_u32 (uint32x4_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_u64 (uint64x2_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_p8 (poly8x16_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_p16 (poly16x8_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_p64 (poly64x2_t __a)
{
  return (int16x8_t) __a;
}

__extension__ extern __inline int16x8_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s16_p128 (poly128_t __a)
{
  return (int16x8_t)__a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_f16 (float16x4_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_f64 (float64x1_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_s8 (int8x8_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_s16 (int16x4_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_s64 (int64x1_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_f32 (float32x2_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_u8 (uint8x8_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_u16 (uint16x4_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_u32 (uint32x2_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_u64 (uint64x1_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_p8 (poly8x8_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_p16 (poly16x4_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_s32_p64 (poly64x1_t __a)
{
  return (int32x2_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_f64 (float64x2_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_s8 (int8x16_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_s16 (int16x8_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_s64 (int64x2_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_f16 (float16x8_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_f32 (float32x4_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_u8 (uint8x16_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_u16 (uint16x8_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_u32 (uint32x4_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_u64 (uint64x2_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_p8 (poly8x16_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_p16 (poly16x8_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_p64 (poly64x2_t __a)
{
  return (int32x4_t) __a;
}

__extension__ extern __inline int32x4_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_s32_p128 (poly128_t __a)
{
  return (int32x4_t)__a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_f16 (float16x4_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_f64 (float64x1_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_s8 (int8x8_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_s16 (int16x4_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_s32 (int32x2_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_s64 (int64x1_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_f32 (float32x2_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_u16 (uint16x4_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_u32 (uint32x2_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_u64 (uint64x1_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_p8 (poly8x8_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_p16 (poly16x4_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u8_p64 (poly64x1_t __a)
{
  return (uint8x8_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_f64 (float64x2_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_s8 (int8x16_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_s16 (int16x8_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_s32 (int32x4_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_s64 (int64x2_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_f16 (float16x8_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_f32 (float32x4_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_u16 (uint16x8_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_u32 (uint32x4_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_u64 (uint64x2_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_p8 (poly8x16_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_p16 (poly16x8_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_p64 (poly64x2_t __a)
{
  return (uint8x16_t) __a;
}

__extension__ extern __inline uint8x16_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u8_p128 (poly128_t __a)
{
  return (uint8x16_t)__a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_f16 (float16x4_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_f64 (float64x1_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_s8 (int8x8_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_s16 (int16x4_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_s32 (int32x2_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_s64 (int64x1_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_f32 (float32x2_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_u8 (uint8x8_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_u32 (uint32x2_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_u64 (uint64x1_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_p8 (poly8x8_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_p16 (poly16x4_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u16_p64 (poly64x1_t __a)
{
  return (uint16x4_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_f64 (float64x2_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_s8 (int8x16_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_s16 (int16x8_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_s32 (int32x4_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_s64 (int64x2_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_f16 (float16x8_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_f32 (float32x4_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_u8 (uint8x16_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_u32 (uint32x4_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_u64 (uint64x2_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_p8 (poly8x16_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_p16 (poly16x8_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_p64 (poly64x2_t __a)
{
  return (uint16x8_t) __a;
}

__extension__ extern __inline uint16x8_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u16_p128 (poly128_t __a)
{
  return (uint16x8_t)__a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_f16 (float16x4_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_f64 (float64x1_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_s8 (int8x8_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_s16 (int16x4_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_s32 (int32x2_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_s64 (int64x1_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_f32 (float32x2_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_u8 (uint8x8_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_u16 (uint16x4_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_u64 (uint64x1_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_p8 (poly8x8_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_p16 (poly16x4_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpret_u32_p64 (poly64x1_t __a)
{
  return (uint32x2_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_f64 (float64x2_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_s8 (int8x16_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_s16 (int16x8_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_s32 (int32x4_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_s64 (int64x2_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_f16 (float16x8_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_f32 (float32x4_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_u8 (uint8x16_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_u16 (uint16x8_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_u64 (uint64x2_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_p8 (poly8x16_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_p16 (poly16x8_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_p64 (poly64x2_t __a)
{
  return (uint32x4_t) __a;
}

__extension__ extern __inline uint32x4_t
__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
vreinterpretq_u32_p128 (poly128_t __a)
{
  return (uint32x4_t)__a;
}

/* vset_lane  */

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_p64 (poly64_t __elem, poly64x1_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

/* vsetq_lane  */

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_p64 (poly64_t __elem, poly64x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
{
  return __aarch64_vset_lane_any (__elem, __vec, __index);
}

#define __GET_LOW(__TYPE) \
  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
  uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0));  \
  return vreinterpret_##__TYPE##_u64 (lo);

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_f16 (float16x8_t __a)
{
  __GET_LOW (f16);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_f32 (float32x4_t __a)
{
  __GET_LOW (f32);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_f64 (float64x2_t __a)
{
  return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_p8 (poly8x16_t __a)
{
  __GET_LOW (p8);
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_p16 (poly16x8_t __a)
{
  __GET_LOW (p16);
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_p64 (poly64x2_t __a)
{
  __GET_LOW (p64);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_s8 (int8x16_t __a)
{
  __GET_LOW (s8);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_s16 (int16x8_t __a)
{
  __GET_LOW (s16);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_s32 (int32x4_t __a)
{
  __GET_LOW (s32);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_s64 (int64x2_t __a)
{
  __GET_LOW (s64);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_u8 (uint8x16_t __a)
{
  __GET_LOW (u8);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_u16 (uint16x8_t __a)
{
  __GET_LOW (u16);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_u32 (uint32x4_t __a)
{
  __GET_LOW (u32);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_low_u64 (uint64x2_t __a)
{
  return vcreate_u64 (vgetq_lane_u64 (__a, 0));
}

#undef __GET_LOW

#define __GET_HIGH(__TYPE)					\
  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
  uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
  return vreinterpret_##__TYPE##_u64 (hi);

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_f16 (float16x8_t __a)
{
  __GET_HIGH (f16);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_f32 (float32x4_t __a)
{
  __GET_HIGH (f32);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_f64 (float64x2_t __a)
{
  __GET_HIGH (f64);
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_p8 (poly8x16_t __a)
{
  __GET_HIGH (p8);
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_p16 (poly16x8_t __a)
{
  __GET_HIGH (p16);
}

__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_p64 (poly64x2_t __a)
{
  __GET_HIGH (p64);
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_s8 (int8x16_t __a)
{
  __GET_HIGH (s8);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_s16 (int16x8_t __a)
{
  __GET_HIGH (s16);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_s32 (int32x4_t __a)
{
  __GET_HIGH (s32);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_s64 (int64x2_t __a)
{
  __GET_HIGH (s64);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_u8 (uint8x16_t __a)
{
  __GET_HIGH (u8);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_u16 (uint16x8_t __a)
{
  __GET_HIGH (u16);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_u32 (uint32x4_t __a)
{
  __GET_HIGH (u32);
}

#undef __GET_HIGH

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vget_high_u64 (uint64x2_t __a)
{
  return vcreate_u64 (vgetq_lane_u64 (__a, 1));
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_s8 (int8x8_t __a, int8x8_t __b)
{
  return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_s16 (int16x4_t __a, int16x4_t __b)
{
  return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_s32 (int32x2_t __a, int32x2_t __b)
{
  return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_s64 (int64x1_t __a, int64x1_t __b)
{
  return __builtin_aarch64_combinedi (__a[0], __b[0]);
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_f16 (float16x4_t __a, float16x4_t __b)
{
  return __builtin_aarch64_combinev4hf (__a, __b);
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
  return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
						     (int8x8_t) __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
						     (int16x4_t) __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
						     (int32x2_t) __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_f64 (float64x1_t __a, float64x1_t __b)
{
  return __builtin_aarch64_combinedf (__a[0], __b[0]);
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
{
  return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
						     (int8x8_t) __b);
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
{
  return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
						     (int16x4_t) __b);
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
{
  return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
}

/* Start of temporary inline asm implementations.  */

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
{
  int8x8_t result;
  __asm__ ("saba %0.8b,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
{
  int16x4_t result;
  __asm__ ("saba %0.4h,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
{
  int32x2_t result;
  __asm__ ("saba %0.2s,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint8x8_t result;
  __asm__ ("uaba %0.8b,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint16x4_t result;
  __asm__ ("uaba %0.4h,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint32x2_t result;
  __asm__ ("uaba %0.2s,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
{
  int16x8_t result;
  __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
{
  int32x4_t result;
  __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
{
  int64x2_t result;
  __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
{
  uint16x8_t result;
  __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
{
  uint32x4_t result;
  __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
{
  uint64x2_t result;
  __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
{
  int16x8_t result;
  __asm__ ("sabal %0.8h,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
{
  int32x4_t result;
  __asm__ ("sabal %0.4s,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
{
  int64x2_t result;
  __asm__ ("sabal %0.2d,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint16x8_t result;
  __asm__ ("uabal %0.8h,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint32x4_t result;
  __asm__ ("uabal %0.4s,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint64x2_t result;
  __asm__ ("uabal %0.2d,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
{
  int8x16_t result;
  __asm__ ("saba %0.16b,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
{
  int16x8_t result;
  __asm__ ("saba %0.8h,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
{
  int32x4_t result;
  __asm__ ("saba %0.4s,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
{
  uint8x16_t result;
  __asm__ ("uaba %0.16b,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
{
  uint16x8_t result;
  __asm__ ("uaba %0.8h,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
{
  uint32x4_t result;
  __asm__ ("uaba %0.4s,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_s8 (int8x8_t a, int8x8_t b)
{
  int8x8_t result;
  __asm__ ("sabd %0.8b, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_s16 (int16x4_t a, int16x4_t b)
{
  int16x4_t result;
  __asm__ ("sabd %0.4h, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_s32 (int32x2_t a, int32x2_t b)
{
  int32x2_t result;
  __asm__ ("sabd %0.2s, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_u8 (uint8x8_t a, uint8x8_t b)
{
  uint8x8_t result;
  __asm__ ("uabd %0.8b, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_u16 (uint16x4_t a, uint16x4_t b)
{
  uint16x4_t result;
  __asm__ ("uabd %0.4h, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_u32 (uint32x2_t a, uint32x2_t b)
{
  uint32x2_t result;
  __asm__ ("uabd %0.2s, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_s8 (int8x16_t a, int8x16_t b)
{
  int16x8_t result;
  __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_s16 (int16x8_t a, int16x8_t b)
{
  int32x4_t result;
  __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_s32 (int32x4_t a, int32x4_t b)
{
  int64x2_t result;
  __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
{
  uint16x8_t result;
  __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
{
  uint32x4_t result;
  __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
{
  uint64x2_t result;
  __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_s8 (int8x8_t a, int8x8_t b)
{
  int16x8_t result;
  __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_s16 (int16x4_t a, int16x4_t b)
{
  int32x4_t result;
  __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_s32 (int32x2_t a, int32x2_t b)
{
  int64x2_t result;
  __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_u8 (uint8x8_t a, uint8x8_t b)
{
  uint16x8_t result;
  __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_u16 (uint16x4_t a, uint16x4_t b)
{
  uint32x4_t result;
  __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdl_u32 (uint32x2_t a, uint32x2_t b)
{
  uint64x2_t result;
  __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_s8 (int8x16_t a, int8x16_t b)
{
  int8x16_t result;
  __asm__ ("sabd %0.16b, %1.16b, %2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_s16 (int16x8_t a, int16x8_t b)
{
  int16x8_t result;
  __asm__ ("sabd %0.8h, %1.8h, %2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_s32 (int32x4_t a, int32x4_t b)
{
  int32x4_t result;
  __asm__ ("sabd %0.4s, %1.4s, %2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_u8 (uint8x16_t a, uint8x16_t b)
{
  uint8x16_t result;
  __asm__ ("uabd %0.16b, %1.16b, %2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_u16 (uint16x8_t a, uint16x8_t b)
{
  uint16x8_t result;
  __asm__ ("uabd %0.8h, %1.8h, %2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_u32 (uint32x4_t a, uint32x4_t b)
{
  uint32x4_t result;
  __asm__ ("uabd %0.4s, %1.4s, %2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_s8 (int8x8_t a)
{
  int16_t result;
  __asm__ ("saddlv %h0,%1.8b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_s16 (int16x4_t a)
{
  int32_t result;
  __asm__ ("saddlv %s0,%1.4h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_u8 (uint8x8_t a)
{
  uint16_t result;
  __asm__ ("uaddlv %h0,%1.8b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_u16 (uint16x4_t a)
{
  uint32_t result;
  __asm__ ("uaddlv %s0,%1.4h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_s8 (int8x16_t a)
{
  int16_t result;
  __asm__ ("saddlv %h0,%1.16b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_s16 (int16x8_t a)
{
  int32_t result;
  __asm__ ("saddlv %s0,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_s32 (int32x4_t a)
{
  int64_t result;
  __asm__ ("saddlv %d0,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_u8 (uint8x16_t a)
{
  uint16_t result;
  __asm__ ("uaddlv %h0,%1.16b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_u16 (uint16x8_t a)
{
  uint32_t result;
  __asm__ ("uaddlv %s0,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlvq_u32 (uint32x4_t a)
{
  uint64_t result;
  __asm__ ("uaddlv %d0,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtx_f32_f64 (float64x2_t a)
{
  float32x2_t result;
  __asm__ ("fcvtxn %0.2s,%1.2d"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
{
  float32x4_t result;
  __asm__ ("fcvtxn2 %0.4s,%1.2d"
           : "=w"(result)
           : "w" (b), "0"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtxd_f32_f64 (float64_t a)
{
  float32_t result;
  __asm__ ("fcvtxn %s0,%d1"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
{
  float32x2_t result;
  float32x2_t t1;
  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
           : "=w"(result), "=w"(t1)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
{
  int16x4_t result;
  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
{
  int32x2_t result;
  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
{
  uint16x4_t result;
  __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
{
  uint32x2_t result;
  __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
{
  int8x8_t result;
  __asm__ ("mla %0.8b, %2.8b, %3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
{
  int16x4_t result;
  __asm__ ("mla %0.4h, %2.4h, %3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
{
  int32x2_t result;
  __asm__ ("mla %0.2s, %2.2s, %3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint8x8_t result;
  __asm__ ("mla %0.8b, %2.8b, %3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint16x4_t result;
  __asm__ ("mla %0.4h, %2.4h, %3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint32x2_t result;
  __asm__ ("mla %0.2s, %2.2s, %3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

#define vmlal_high_lane_s16(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t c_ = (c);                                              \
       int16x8_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_lane_s32(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t c_ = (c);                                              \
       int32x4_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_lane_u16(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t c_ = (c);                                             \
       uint16x8_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_lane_u32(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t c_ = (c);                                             \
       uint32x4_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_laneq_s16(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t c_ = (c);                                              \
       int16x8_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_laneq_s32(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t c_ = (c);                                              \
       int32x4_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_laneq_u16(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t c_ = (c);                                             \
       uint16x8_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_high_laneq_u32(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t c_ = (c);                                             \
       uint32x4_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
{
  int32x4_t result;
  __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
{
  int64x2_t result;
  __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
{
  uint32x4_t result;
  __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
{
  uint64x2_t result;
  __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
{
  int16x8_t result;
  __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
{
  int32x4_t result;
  __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
{
  int64x2_t result;
  __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
{
  uint16x8_t result;
  __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
{
  uint32x4_t result;
  __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
{
  uint64x2_t result;
  __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

#define vmlal_lane_s16(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t c_ = (c);                                              \
       int16x4_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_lane_s32(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t c_ = (c);                                              \
       int32x2_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_lane_u16(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t c_ = (c);                                             \
       uint16x4_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_lane_u32(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t c_ = (c);                                             \
       uint32x2_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_laneq_s16(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t c_ = (c);                                              \
       int16x4_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_laneq_s32(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t c_ = (c);                                              \
       int32x2_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_laneq_u16(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t c_ = (c);                                             \
       uint16x4_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlal_laneq_u32(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t c_ = (c);                                             \
       uint32x2_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
{
  int32x4_t result;
  __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
{
  int64x2_t result;
  __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
{
  uint32x4_t result;
  __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
{
  uint64x2_t result;
  __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
{
  int16x8_t result;
  __asm__ ("smlal %0.8h,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
{
  int32x4_t result;
  __asm__ ("smlal %0.4s,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
{
  int64x2_t result;
  __asm__ ("smlal %0.2d,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint16x8_t result;
  __asm__ ("umlal %0.8h,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint32x4_t result;
  __asm__ ("umlal %0.4s,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint64x2_t result;
  __asm__ ("umlal %0.2d,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
{
  float32x4_t result;
  float32x4_t t1;
  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
           : "=w"(result), "=w"(t1)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
{
  int16x8_t result;
  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
{
  int32x4_t result;
  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
{
  uint16x8_t result;
  __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
{
  uint32x4_t result;
  __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
{
  int8x16_t result;
  __asm__ ("mla %0.16b, %2.16b, %3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
{
  int16x8_t result;
  __asm__ ("mla %0.8h, %2.8h, %3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
{
  int32x4_t result;
  __asm__ ("mla %0.4s, %2.4s, %3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
{
  uint8x16_t result;
  __asm__ ("mla %0.16b, %2.16b, %3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
{
  uint16x8_t result;
  __asm__ ("mla %0.8h, %2.8h, %3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
{
  uint32x4_t result;
  __asm__ ("mla %0.4s, %2.4s, %3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
{
  float32x2_t result;
  float32x2_t t1;
  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
           : "=w"(result), "=w"(t1)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
{
  int16x4_t result;
  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
{
  int32x2_t result;
  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
{
  uint16x4_t result;
  __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
{
  uint32x2_t result;
  __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
{
  int8x8_t result;
  __asm__ ("mls %0.8b,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
{
  int16x4_t result;
  __asm__ ("mls %0.4h,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
{
  int32x2_t result;
  __asm__ ("mls %0.2s,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint8x8_t result;
  __asm__ ("mls %0.8b,%2.8b,%3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint16x4_t result;
  __asm__ ("mls %0.4h,%2.4h,%3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint32x2_t result;
  __asm__ ("mls %0.2s,%2.2s,%3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

#define vmlsl_high_lane_s16(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t c_ = (c);                                              \
       int16x8_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_lane_s32(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t c_ = (c);                                              \
       int32x4_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_lane_u16(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t c_ = (c);                                             \
       uint16x8_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_lane_u32(a, b, c, d)                                 \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t c_ = (c);                                             \
       uint32x4_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_laneq_s16(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t c_ = (c);                                              \
       int16x8_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_laneq_s32(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t c_ = (c);                                              \
       int32x4_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_laneq_u16(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t c_ = (c);                                             \
       uint16x8_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_high_laneq_u32(a, b, c, d)                                \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t c_ = (c);                                             \
       uint32x4_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
{
  int32x4_t result;
  __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
{
  int64x2_t result;
  __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
{
  uint32x4_t result;
  __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
{
  uint64x2_t result;
  __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
{
  int16x8_t result;
  __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
{
  int32x4_t result;
  __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
{
  int64x2_t result;
  __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
{
  uint16x8_t result;
  __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
{
  uint32x4_t result;
  __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
{
  uint64x2_t result;
  __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

#define vmlsl_lane_s16(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t c_ = (c);                                              \
       int16x4_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_lane_s32(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t c_ = (c);                                              \
       int32x2_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_lane_u16(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t c_ = (c);                                             \
       uint16x4_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_lane_u32(a, b, c, d)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t c_ = (c);                                             \
       uint32x2_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_laneq_s16(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t c_ = (c);                                              \
       int16x4_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_laneq_s32(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t c_ = (c);                                              \
       int32x2_t b_ = (b);                                              \
       int64x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_laneq_u16(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t c_ = (c);                                             \
       uint16x4_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmlsl_laneq_u32(a, b, c, d)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t c_ = (c);                                             \
       uint32x2_t b_ = (b);                                             \
       uint64x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
{
  int32x4_t result;
  __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
{
  int64x2_t result;
  __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
{
  uint32x4_t result;
  __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
{
  uint64x2_t result;
  __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
{
  int16x8_t result;
  __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
{
  int32x4_t result;
  __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
{
  int64x2_t result;
  __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
{
  uint16x8_t result;
  __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
{
  uint32x4_t result;
  __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
{
  uint64x2_t result;
  __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
{
  float32x4_t result;
  float32x4_t t1;
  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
           : "=w"(result), "=w"(t1)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
{
  int16x8_t result;
  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
{
  int32x4_t result;
  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
{
  uint16x8_t result;
  __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "x"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
{
  uint32x4_t result;
  __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
{
  int8x16_t result;
  __asm__ ("mls %0.16b,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
{
  int16x8_t result;
  __asm__ ("mls %0.8h,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
{
  int32x4_t result;
  __asm__ ("mls %0.4s,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
{
  uint8x16_t result;
  __asm__ ("mls %0.16b,%2.16b,%3.16b"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
{
  uint16x8_t result;
  __asm__ ("mls %0.8h,%2.8h,%3.8h"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
{
  uint32x4_t result;
  __asm__ ("mls %0.4s,%2.4s,%3.4s"
           : "=w"(result)
           : "0"(a), "w"(b), "w"(c)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s8 (int8x16_t a)
{
  int16x8_t result;
  __asm__ ("sshll2 %0.8h,%1.16b,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s16 (int16x8_t a)
{
  int32x4_t result;
  __asm__ ("sshll2 %0.4s,%1.8h,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s32 (int32x4_t a)
{
  int64x2_t result;
  __asm__ ("sshll2 %0.2d,%1.4s,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u8 (uint8x16_t a)
{
  uint16x8_t result;
  __asm__ ("ushll2 %0.8h,%1.16b,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u16 (uint16x8_t a)
{
  uint32x4_t result;
  __asm__ ("ushll2 %0.4s,%1.8h,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u32 (uint32x4_t a)
{
  uint64x2_t result;
  __asm__ ("ushll2 %0.2d,%1.4s,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_s8 (int8x8_t a)
{
  int16x8_t result;
  __asm__ ("sshll %0.8h,%1.8b,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_s16 (int16x4_t a)
{
  int32x4_t result;
  __asm__ ("sshll %0.4s,%1.4h,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_s32 (int32x2_t a)
{
  int64x2_t result;
  __asm__ ("sshll %0.2d,%1.2s,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_u8 (uint8x8_t a)
{
  uint16x8_t result;
  __asm__ ("ushll %0.8h,%1.8b,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_u16 (uint16x4_t a)
{
  uint32x4_t result;
  __asm__ ("ushll %0.4s,%1.4h,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_u32 (uint32x2_t a)
{
  uint64x2_t result;
  __asm__ ("ushll %0.2d,%1.2s,#0"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_s16 (int8x8_t a, int16x8_t b)
{
  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.16b,%1.8h"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_s32 (int16x4_t a, int32x4_t b)
{
  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.8h,%1.4s"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_s64 (int32x2_t a, int64x2_t b)
{
  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.4s,%1.2d"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
{
  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.16b,%1.8h"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
{
  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.8h,%1.4s"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
{
  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("xtn2 %0.4s,%1.2d"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_s16 (int16x8_t a)
{
  int8x8_t result;
  __asm__ ("xtn %0.8b,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_s32 (int32x4_t a)
{
  int16x4_t result;
  __asm__ ("xtn %0.4h,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_s64 (int64x2_t a)
{
  int32x2_t result;
  __asm__ ("xtn %0.2s,%1.2d"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_u16 (uint16x8_t a)
{
  uint8x8_t result;
  __asm__ ("xtn %0.8b,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_u32 (uint32x4_t a)
{
  uint16x4_t result;
  __asm__ ("xtn %0.4h,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovn_u64 (uint64x2_t a)
{
  uint32x2_t result;
  __asm__ ("xtn %0.2s,%1.2d"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

#define vmull_high_lane_s16(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t b_ = (b);                                              \
       int16x8_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_lane_s32(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_lane_u16(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t b_ = (b);                                             \
       uint16x8_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_lane_u32(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_laneq_s16(a, b, c)                                   \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int16x8_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_laneq_s32(a, b, c)                                   \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int32x4_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_laneq_u16(a, b, c)                                   \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint16x8_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_high_laneq_u32(a, b, c)                                   \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint32x4_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_n_s16 (int16x8_t a, int16_t b)
{
  int32x4_t result;
  __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_n_s32 (int32x4_t a, int32_t b)
{
  int64x2_t result;
  __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_n_u16 (uint16x8_t a, uint16_t b)
{
  uint32x4_t result;
  __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_n_u32 (uint32x4_t a, uint32_t b)
{
  uint64x2_t result;
  __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_p8 (poly8x16_t a, poly8x16_t b)
{
  poly16x8_t result;
  __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_s8 (int8x16_t a, int8x16_t b)
{
  int16x8_t result;
  __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_s16 (int16x8_t a, int16x8_t b)
{
  int32x4_t result;
  __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_s32 (int32x4_t a, int32x4_t b)
{
  int64x2_t result;
  __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_u8 (uint8x16_t a, uint8x16_t b)
{
  uint16x8_t result;
  __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_u16 (uint16x8_t a, uint16x8_t b)
{
  uint32x4_t result;
  __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_high_u32 (uint32x4_t a, uint32x4_t b)
{
  uint64x2_t result;
  __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

#define vmull_lane_s16(a, b, c)                                         \
  __extension__                                                         \
    ({                                                                  \
       int16x4_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_lane_s32(a, b, c)                                         \
  __extension__                                                         \
    ({                                                                  \
       int32x2_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_lane_u16(a, b, c)                                         \
  __extension__                                                         \
    ({                                                                  \
       uint16x4_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_lane_u32(a, b, c)                                         \
  __extension__                                                         \
    ({                                                                  \
       uint32x2_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_laneq_s16(a, b, c)                                        \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int32x4_t result;                                                \
       __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_laneq_s32(a, b, c)                                        \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int64x2_t result;                                                \
       __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_laneq_u16(a, b, c)                                        \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint32x4_t result;                                               \
       __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
                : "=w"(result)                                          \
                : "w"(a_), "x"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vmull_laneq_u32(a, b, c)                                        \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint64x2_t result;                                               \
       __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
                : "=w"(result)                                          \
                : "w"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_n_s16 (int16x4_t a, int16_t b)
{
  int32x4_t result;
  __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_n_s32 (int32x2_t a, int32_t b)
{
  int64x2_t result;
  __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_n_u16 (uint16x4_t a, uint16_t b)
{
  uint32x4_t result;
  __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_n_u32 (uint32x2_t a, uint32_t b)
{
  uint64x2_t result;
  __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_p8 (poly8x8_t a, poly8x8_t b)
{
  poly16x8_t result;
  __asm__ ("pmull %0.8h, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_s8 (int8x8_t a, int8x8_t b)
{
  int16x8_t result;
  __asm__ ("smull %0.8h, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_s16 (int16x4_t a, int16x4_t b)
{
  int32x4_t result;
  __asm__ ("smull %0.4s, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_s32 (int32x2_t a, int32x2_t b)
{
  int64x2_t result;
  __asm__ ("smull %0.2d, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_u8 (uint8x8_t a, uint8x8_t b)
{
  uint16x8_t result;
  __asm__ ("umull %0.8h, %1.8b, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_u16 (uint16x4_t a, uint16x4_t b)
{
  uint32x4_t result;
  __asm__ ("umull %0.4s, %1.4h, %2.4h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmull_u32 (uint32x2_t a, uint32x2_t b)
{
  uint64x2_t result;
  __asm__ ("umull %0.2d, %1.2s, %2.2s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_s8 (int16x4_t a, int8x8_t b)
{
  int16x4_t result;
  __asm__ ("sadalp %0.4h,%2.8b"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_s16 (int32x2_t a, int16x4_t b)
{
  int32x2_t result;
  __asm__ ("sadalp %0.2s,%2.4h"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_s32 (int64x1_t a, int32x2_t b)
{
  int64x1_t result;
  __asm__ ("sadalp %0.1d,%2.2s"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_u8 (uint16x4_t a, uint8x8_t b)
{
  uint16x4_t result;
  __asm__ ("uadalp %0.4h,%2.8b"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_u16 (uint32x2_t a, uint16x4_t b)
{
  uint32x2_t result;
  __asm__ ("uadalp %0.2s,%2.4h"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadal_u32 (uint64x1_t a, uint32x2_t b)
{
  uint64x1_t result;
  __asm__ ("uadalp %0.1d,%2.2s"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_s8 (int16x8_t a, int8x16_t b)
{
  int16x8_t result;
  __asm__ ("sadalp %0.8h,%2.16b"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_s16 (int32x4_t a, int16x8_t b)
{
  int32x4_t result;
  __asm__ ("sadalp %0.4s,%2.8h"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_s32 (int64x2_t a, int32x4_t b)
{
  int64x2_t result;
  __asm__ ("sadalp %0.2d,%2.4s"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_u8 (uint16x8_t a, uint8x16_t b)
{
  uint16x8_t result;
  __asm__ ("uadalp %0.8h,%2.16b"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_u16 (uint32x4_t a, uint16x8_t b)
{
  uint32x4_t result;
  __asm__ ("uadalp %0.4s,%2.8h"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpadalq_u32 (uint64x2_t a, uint32x4_t b)
{
  uint64x2_t result;
  __asm__ ("uadalp %0.2d,%2.4s"
           : "=w"(result)
           : "0"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_s8 (int8x8_t a)
{
  int16x4_t result;
  __asm__ ("saddlp %0.4h,%1.8b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_s16 (int16x4_t a)
{
  int32x2_t result;
  __asm__ ("saddlp %0.2s,%1.4h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_s32 (int32x2_t a)
{
  int64x1_t result;
  __asm__ ("saddlp %0.1d,%1.2s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_u8 (uint8x8_t a)
{
  uint16x4_t result;
  __asm__ ("uaddlp %0.4h,%1.8b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_u16 (uint16x4_t a)
{
  uint32x2_t result;
  __asm__ ("uaddlp %0.2s,%1.4h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddl_u32 (uint32x2_t a)
{
  uint64x1_t result;
  __asm__ ("uaddlp %0.1d,%1.2s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_s8 (int8x16_t a)
{
  int16x8_t result;
  __asm__ ("saddlp %0.8h,%1.16b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_s16 (int16x8_t a)
{
  int32x4_t result;
  __asm__ ("saddlp %0.4s,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_s32 (int32x4_t a)
{
  int64x2_t result;
  __asm__ ("saddlp %0.2d,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_u8 (uint8x16_t a)
{
  uint16x8_t result;
  __asm__ ("uaddlp %0.8h,%1.16b"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_u16 (uint16x8_t a)
{
  uint32x4_t result;
  __asm__ ("uaddlp %0.4s,%1.8h"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddlq_u32 (uint32x4_t a)
{
  uint64x2_t result;
  __asm__ ("uaddlp %0.2d,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_s8 (int8x16_t a, int8x16_t b)
{
  int8x16_t result;
  __asm__ ("addp %0.16b,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_s16 (int16x8_t a, int16x8_t b)
{
  int16x8_t result;
  __asm__ ("addp %0.8h,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_s32 (int32x4_t a, int32x4_t b)
{
  int32x4_t result;
  __asm__ ("addp %0.4s,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_s64 (int64x2_t a, int64x2_t b)
{
  int64x2_t result;
  __asm__ ("addp %0.2d,%1.2d,%2.2d"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_u8 (uint8x16_t a, uint8x16_t b)
{
  uint8x16_t result;
  __asm__ ("addp %0.16b,%1.16b,%2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_u16 (uint16x8_t a, uint16x8_t b)
{
  uint16x8_t result;
  __asm__ ("addp %0.8h,%1.8h,%2.8h"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_u32 (uint32x4_t a, uint32x4_t b)
{
  uint32x4_t result;
  __asm__ ("addp %0.4s,%1.4s,%2.4s"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vpaddq_u64 (uint64x2_t a, uint64x2_t b)
{
  uint64x2_t result;
  __asm__ ("addp %0.2d,%1.2d,%2.2d"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_n_s16 (int16x4_t a, int16_t b)
{
  int16x4_t result;
  __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_n_s32 (int32x2_t a, int32_t b)
{
  int32x2_t result;
  __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_n_s16 (int16x8_t a, int16_t b)
{
  int16x8_t result;
  __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_n_s32 (int32x4_t a, int32_t b)
{
  int32x4_t result;
  __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_s16 (int8x8_t a, int16x8_t b)
{
  int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtn2 %0.16b, %1.8h"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_s32 (int16x4_t a, int32x4_t b)
{
  int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtn2 %0.8h, %1.4s"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_s64 (int32x2_t a, int64x2_t b)
{
  int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtn2 %0.4s, %1.2d"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
{
  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("uqxtn2 %0.16b, %1.8h"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
{
  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("uqxtn2 %0.8h, %1.4s"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
{
  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("uqxtn2 %0.4s, %1.2d"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
{
  uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtun2 %0.16b, %1.8h"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
{
  uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtun2 %0.8h, %1.4s"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
{
  uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("sqxtun2 %0.4s, %1.2d"
           : "+w"(result)
           : "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_n_s16 (int16x4_t a, int16_t b)
{
  int16x4_t result;
  __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_n_s32 (int32x2_t a, int32_t b)
{
  int32x2_t result;
  __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
{
  int16x8_t result;
  __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
           : "=w"(result)
           : "w"(a), "x"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
{
  int32x4_t result;
  __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

#define vqrshrn_high_n_s16(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int8x8_t a_ = (a);                                               \
       int8x16_t result = vcombine_s8                                   \
                            (a_, vcreate_s8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrn_high_n_s32(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int16x8_t result = vcombine_s16                                  \
                            (a_, vcreate_s16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrn_high_n_s64(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int32x4_t result = vcombine_s32                                  \
                            (a_, vcreate_s32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrn_high_n_u16(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8                            \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrn_high_n_u32(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrn_high_n_u64(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrun_high_n_s16(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8                            \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrun_high_n_s32(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqrshrun_high_n_s64(a, b, c)                                    \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_s16(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int8x8_t a_ = (a);                                               \
       int8x16_t result = vcombine_s8                                   \
                            (a_, vcreate_s8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_s32(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int16x8_t result = vcombine_s16                                  \
                            (a_, vcreate_s16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_s64(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int32x4_t result = vcombine_s32                                  \
                            (a_, vcreate_s32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_u16(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8                            \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_u32(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrn_high_n_u64(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrun_high_n_s16(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8                            \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrun_high_n_s32(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vqshrun_high_n_s64(a, b, c)                                     \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32                           \
                                    (__AARCH64_UINT64_C (0x0)));        \
       __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_s16(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int8x8_t a_ = (a);                                               \
       int8x16_t result = vcombine_s8                                   \
                            (a_, vcreate_s8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_s32(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int16x8_t result = vcombine_s16                                  \
                            (a_, vcreate_s16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_s64(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int32x4_t result = vcombine_s32                                  \
                            (a_, vcreate_s32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_u16(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                            (a_, vcreate_u8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_u32(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                            (a_, vcreate_u16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_high_n_u64(a, b, c)                                      \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                            (a_, vcreate_u32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_s16(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t a_ = (a);                                              \
       int8x8_t result;                                                 \
       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_s32(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t a_ = (a);                                              \
       int16x4_t result;                                                \
       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_s64(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t a_ = (a);                                              \
       int32x2_t result;                                                \
       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_u16(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t a_ = (a);                                             \
       uint8x8_t result;                                                \
       __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_u32(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t a_ = (a);                                             \
       uint16x4_t result;                                               \
       __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vrshrn_n_u64(a, b)                                              \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t a_ = (a);                                             \
       uint32x2_t result;                                               \
       __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrte_u32 (uint32x2_t a)
{
  uint32x2_t result;
  __asm__ ("ursqrte %0.2s,%1.2s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrteq_u32 (uint32x4_t a)
{
  uint32x4_t result;
  __asm__ ("ursqrte %0.4s,%1.4s"
           : "=w"(result)
           : "w"(a)
           : /* No clobbers */);
  return result;
}

#define vshrn_high_n_s16(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t b_ = (b);                                              \
       int8x8_t a_ = (a);                                               \
       int8x16_t result = vcombine_s8                                   \
                            (a_, vcreate_s8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_high_n_s32(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t b_ = (b);                                              \
       int16x4_t a_ = (a);                                              \
       int16x8_t result = vcombine_s16                                  \
                            (a_, vcreate_s16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_high_n_s64(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t b_ = (b);                                              \
       int32x2_t a_ = (a);                                              \
       int32x4_t result = vcombine_s32                                  \
                            (a_, vcreate_s32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_high_n_u16(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t b_ = (b);                                             \
       uint8x8_t a_ = (a);                                              \
       uint8x16_t result = vcombine_u8                                  \
                            (a_, vcreate_u8                             \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_high_n_u32(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t b_ = (b);                                             \
       uint16x4_t a_ = (a);                                             \
       uint16x8_t result = vcombine_u16                                 \
                            (a_, vcreate_u16                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_high_n_u64(a, b, c)                                       \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t b_ = (b);                                             \
       uint32x2_t a_ = (a);                                             \
       uint32x4_t result = vcombine_u32                                 \
                            (a_, vcreate_u32                            \
                                   (__AARCH64_UINT64_C (0x0)));         \
       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                : "+w"(result)                                          \
                : "w"(b_), "i"(c)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_s16(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       int16x8_t a_ = (a);                                              \
       int8x8_t result;                                                 \
       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_s32(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       int32x4_t a_ = (a);                                              \
       int16x4_t result;                                                \
       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_s64(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       int64x2_t a_ = (a);                                              \
       int32x2_t result;                                                \
       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_u16(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       uint16x8_t a_ = (a);                                             \
       uint8x8_t result;                                                \
       __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_u32(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       uint32x4_t a_ = (a);                                             \
       uint16x4_t result;                                               \
       __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vshrn_n_u64(a, b)                                               \
  __extension__                                                         \
    ({                                                                  \
       uint64x2_t a_ = (a);                                             \
       uint32x2_t result;                                               \
       __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
                : "=w"(result)                                          \
                : "w"(a_), "i"(b)                                       \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsli_n_p8(a, b, c)                                              \
  __extension__                                                         \
    ({                                                                  \
       poly8x8_t b_ = (b);                                              \
       poly8x8_t a_ = (a);                                              \
       poly8x8_t result;                                                \
       __asm__ ("sli %0.8b,%2.8b,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsli_n_p16(a, b, c)                                             \
  __extension__                                                         \
    ({                                                                  \
       poly16x4_t b_ = (b);                                             \
       poly16x4_t a_ = (a);                                             \
       poly16x4_t result;                                               \
       __asm__ ("sli %0.4h,%2.4h,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsliq_n_p8(a, b, c)                                             \
  __extension__                                                         \
    ({                                                                  \
       poly8x16_t b_ = (b);                                             \
       poly8x16_t a_ = (a);                                             \
       poly8x16_t result;                                               \
       __asm__ ("sli %0.16b,%2.16b,%3"                                  \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsliq_n_p16(a, b, c)                                            \
  __extension__                                                         \
    ({                                                                  \
       poly16x8_t b_ = (b);                                             \
       poly16x8_t a_ = (a);                                             \
       poly16x8_t result;                                               \
       __asm__ ("sli %0.8h,%2.8h,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsri_n_p8(a, b, c)                                              \
  __extension__                                                         \
    ({                                                                  \
       poly8x8_t b_ = (b);                                              \
       poly8x8_t a_ = (a);                                              \
       poly8x8_t result;                                                \
       __asm__ ("sri %0.8b,%2.8b,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsri_n_p16(a, b, c)                                             \
  __extension__                                                         \
    ({                                                                  \
       poly16x4_t b_ = (b);                                             \
       poly16x4_t a_ = (a);                                             \
       poly16x4_t result;                                               \
       __asm__ ("sri %0.4h,%2.4h,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsri_n_p64(a, b, c)						\
  __extension__								\
    ({									\
       poly64x1_t b_ = (b);						\
       poly64x1_t a_ = (a);						\
       poly64x1_t result;						\
       __asm__ ("sri %d0,%d2,%3"					\
		: "=w"(result)						\
		: "0"(a_), "w"(b_), "i"(c)				\
		: /* No clobbers.  */);					\
       result;								\
     })

#define vsriq_n_p8(a, b, c)                                             \
  __extension__                                                         \
    ({                                                                  \
       poly8x16_t b_ = (b);                                             \
       poly8x16_t a_ = (a);                                             \
       poly8x16_t result;                                               \
       __asm__ ("sri %0.16b,%2.16b,%3"                                  \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsriq_n_p16(a, b, c)                                            \
  __extension__                                                         \
    ({                                                                  \
       poly16x8_t b_ = (b);                                             \
       poly16x8_t a_ = (a);                                             \
       poly16x8_t result;                                               \
       __asm__ ("sri %0.8h,%2.8h,%3"                                    \
                : "=w"(result)                                          \
                : "0"(a_), "w"(b_), "i"(c)                              \
                : /* No clobbers */);                                   \
       result;                                                          \
     })

#define vsriq_n_p64(a, b, c)						\
  __extension__								\
    ({									\
       poly64x2_t b_ = (b);						\
       poly64x2_t a_ = (a);						\
       poly64x2_t result;						\
       __asm__ ("sri %0.2d,%2.2d,%3"					\
		: "=w"(result)						\
		: "0"(a_), "w"(b_), "i"(c)				\
		: /* No clobbers.  */);					\
       result;								\
     })

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtst_p8 (poly8x8_t a, poly8x8_t b)
{
  return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b))
		       != 0);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtst_p16 (poly16x4_t a, poly16x4_t b)
{
  return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b))
		       != 0);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtst_p64 (poly64x1_t a, poly64x1_t b)
{
  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtstq_p8 (poly8x16_t a, poly8x16_t b)
{
  return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b))
		       != 0);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtstq_p16 (poly16x8_t a, poly16x8_t b)
{
  return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b))
		       != 0);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtstq_p64 (poly64x2_t a, poly64x2_t b)
{
  return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b))
		       != __AARCH64_INT64_C (0));
}

/* End of temporary inline asm implementations.  */

/* Start of temporary inline asm for vldn, vstn and friends.  */

/* Create struct element types for duplicating loads.

   Create 2 element structures of:

   +------+----+----+----+----+
   |      | 8  | 16 | 32 | 64 |
   +------+----+----+----+----+
   |int   | Y  | Y  | N  | N  |
   +------+----+----+----+----+
   |uint  | Y  | Y  | N  | N  |
   +------+----+----+----+----+
   |float | -  | Y  | N  | N  |
   +------+----+----+----+----+
   |poly  | Y  | Y  | -  | -  |
   +------+----+----+----+----+

   Create 3 element structures of:

   +------+----+----+----+----+
   |      | 8  | 16 | 32 | 64 |
   +------+----+----+----+----+
   |int   | Y  | Y  | Y  | Y  |
   +------+----+----+----+----+
   |uint  | Y  | Y  | Y  | Y  |
   +------+----+----+----+----+
   |float | -  | Y  | Y  | Y  |
   +------+----+----+----+----+
   |poly  | Y  | Y  | -  | -  |
   +------+----+----+----+----+

   Create 4 element structures of:

   +------+----+----+----+----+
   |      | 8  | 16 | 32 | 64 |
   +------+----+----+----+----+
   |int   | Y  | N  | N  | Y  |
   +------+----+----+----+----+
   |uint  | Y  | N  | N  | Y  |
   +------+----+----+----+----+
   |float | -  | N  | N  | Y  |
   +------+----+----+----+----+
   |poly  | Y  | N  | -  | -  |
   +------+----+----+----+----+

  This is required for casting memory reference.  */
#define __STRUCTN(t, sz, nelem)			\
  typedef struct t ## sz ## x ## nelem ## _t {	\
    t ## sz ## _t val[nelem];			\
  }  t ## sz ## x ## nelem ## _t;

/* 2-element structs.  */
__STRUCTN (int, 8, 2)
__STRUCTN (int, 16, 2)
__STRUCTN (uint, 8, 2)
__STRUCTN (uint, 16, 2)
__STRUCTN (float, 16, 2)
__STRUCTN (poly, 8, 2)
__STRUCTN (poly, 16, 2)
/* 3-element structs.  */
__STRUCTN (int, 8, 3)
__STRUCTN (int, 16, 3)
__STRUCTN (int, 32, 3)
__STRUCTN (int, 64, 3)
__STRUCTN (uint, 8, 3)
__STRUCTN (uint, 16, 3)
__STRUCTN (uint, 32, 3)
__STRUCTN (uint, 64, 3)
__STRUCTN (float, 16, 3)
__STRUCTN (float, 32, 3)
__STRUCTN (float, 64, 3)
__STRUCTN (poly, 8, 3)
__STRUCTN (poly, 16, 3)
/* 4-element structs.  */
__STRUCTN (int, 8, 4)
__STRUCTN (int, 64, 4)
__STRUCTN (uint, 8, 4)
__STRUCTN (uint, 64, 4)
__STRUCTN (poly, 8, 4)
__STRUCTN (float, 64, 4)
#undef __STRUCTN


#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
			qmode, ptr_mode, funcsuffix, signedtype)	     \
__extension__ extern __inline void					     \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
			  intype __b, const int __c)			     \
{									     \
  __builtin_aarch64_simd_oi __o;					     \
  largetype __temp;							     \
  __temp.val[0]								     \
    = vcombine_##funcsuffix (__b.val[0],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[1]								     \
    = vcombine_##funcsuffix (__b.val[1],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
					     (signedtype) __temp.val[0], 0); \
  __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
					     (signedtype) __temp.val[1], 1); \
  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
				     __ptr, __o, __c);			     \
}

__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
		 float16x8_t)
__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
		 float32x4_t)
__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
		 float64x2_t)
__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
		 int8x16_t)
__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
		 int16x8_t)
__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
		 poly64x2_t)
__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
		 int8x16_t)
__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
		 int16x8_t)
__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
		 int32x4_t)
__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
		 int64x2_t)
__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
		 int8x16_t)
__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
		 int16x8_t)
__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
		 int32x4_t)
__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
		 int64x2_t)

#undef __ST2_LANE_FUNC
#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
__extension__ extern __inline void					    \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
			   intype __b, const int __c)			    \
{									    \
  union { intype __i;							    \
	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
				    __ptr, __temp.__o, __c);		    \
}

__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
__ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)

#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
			qmode, ptr_mode, funcsuffix, signedtype)	     \
__extension__ extern __inline void					     \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
			  intype __b, const int __c)			     \
{									     \
  __builtin_aarch64_simd_ci __o;					     \
  largetype __temp;							     \
  __temp.val[0]								     \
    = vcombine_##funcsuffix (__b.val[0],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[1]								     \
    = vcombine_##funcsuffix (__b.val[1],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[2]								     \
    = vcombine_##funcsuffix (__b.val[2],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
					     (signedtype) __temp.val[0], 0); \
  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
					     (signedtype) __temp.val[1], 1); \
  __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
					     (signedtype) __temp.val[2], 2); \
  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
				     __ptr, __o, __c);			     \
}

__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
		 float16x8_t)
__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
		 float32x4_t)
__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
		 float64x2_t)
__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
		 int8x16_t)
__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
		 int16x8_t)
__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
		 poly64x2_t)
__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
		 int8x16_t)
__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
		 int16x8_t)
__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
		 int32x4_t)
__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
		 int64x2_t)
__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
		 int8x16_t)
__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
		 int16x8_t)
__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
		 int32x4_t)
__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
		 int64x2_t)

#undef __ST3_LANE_FUNC
#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
__extension__ extern __inline void					    \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
			   intype __b, const int __c)			    \
{									    \
  union { intype __i;							    \
	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
				    __ptr, __temp.__o, __c);		    \
}

__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
__ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)

#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
			qmode, ptr_mode, funcsuffix, signedtype)	     \
__extension__ extern __inline void					     \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
			  intype __b, const int __c)			     \
{									     \
  __builtin_aarch64_simd_xi __o;					     \
  largetype __temp;							     \
  __temp.val[0]								     \
    = vcombine_##funcsuffix (__b.val[0],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[1]								     \
    = vcombine_##funcsuffix (__b.val[1],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[2]								     \
    = vcombine_##funcsuffix (__b.val[2],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __temp.val[3]								     \
    = vcombine_##funcsuffix (__b.val[3],				     \
			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
					     (signedtype) __temp.val[0], 0); \
  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
					     (signedtype) __temp.val[1], 1); \
  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
					     (signedtype) __temp.val[2], 2); \
  __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
					     (signedtype) __temp.val[3], 3); \
  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
				     __ptr, __o, __c);			     \
}

__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
		 float16x8_t)
__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
		 float32x4_t)
__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
		 float64x2_t)
__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
		 int8x16_t)
__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
		 int16x8_t)
__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
		 poly64x2_t)
__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
		 int8x16_t)
__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
		 int16x8_t)
__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
		 int32x4_t)
__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
		 int64x2_t)
__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
		 int8x16_t)
__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
		 int16x8_t)
__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
		 int32x4_t)
__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
		 int64x2_t)

#undef __ST4_LANE_FUNC
#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
__extension__ extern __inline void					    \
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
			   intype __b, const int __c)			    \
{									    \
  union { intype __i;							    \
	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
				    __ptr, __temp.__o, __c);		    \
}

__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
__ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_s32 (int32x2_t a)
{
  int64_t result;
  __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
  return result;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddlv_u32 (uint32x2_t a)
{
  uint64_t result;
  __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
  return result;
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
{
  return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
{
  return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
{
  return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
{
  return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
{
  return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
{
  return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
{
  return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
{
  return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
}

/* Table intrinsics.  */

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
{
  poly8x8_t result;
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1_s8 (int8x16_t a, uint8x8_t b)
{
  int8x8_t result;
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
{
  uint8x8_t result;
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
{
  poly8x16_t result;
  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
{
  int8x16_t result;
  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
{
  uint8x16_t result;
  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
           : "=w"(result)
           : "w"(a), "w"(b)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
{
  int8x8_t result = r;
  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
{
  uint8x8_t result = r;
  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
{
  poly8x8_t result = r;
  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
{
  int8x16_t result = r;
  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
{
  uint8x16_t result = r;
  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
{
  poly8x16_t result = r;
  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
           : "+w"(result)
           : "w"(tab), "w"(idx)
           : /* No clobbers */);
  return result;
}

/* V7 legacy table intrinsics.  */

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl1_s8 (int8x8_t tab, int8x8_t idx)
{
  int8x8_t result;
  int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
{
  uint8x8_t result;
  uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
{
  poly8x8_t result;
  poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
{
  int8x8_t result;
  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
{
  uint8x8_t result;
  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
{
  poly8x8_t result;
  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
           : "=w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
{
  int8x8_t result;
  int8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = __builtin_aarch64_tbl3v8qi (__o, idx);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
{
  uint8x8_t result;
  uint8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
{
  poly8x8_t result;
  poly8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
{
  int8x8_t result;
  int8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = __builtin_aarch64_tbl3v8qi (__o, idx);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
{
  uint8x8_t result;
  uint8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
{
  poly8x8_t result;
  poly8x16x2_t temp;
  __builtin_aarch64_simd_oi __o;
  temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
  temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[0], 0);
  __o = __builtin_aarch64_set_qregoiv16qi (__o,
					   (int8x16_t) temp.val[1], 1);
  result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
  return result;
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
{
  int8x8_t result = r;
  int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
           : "+w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
{
  uint8x8_t result = r;
  uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
           : "+w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
{
  poly8x8_t result = r;
  poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
           : "+w"(result)
           : "w"(temp), "w"(idx)
           : /* No clobbers */);
  return result;
}

/* End of temporary inline asm.  */

/* Start of optimal implementations in approved order.  */

/* vabd.  */

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabds_f32 (float32_t __a, float32_t __b)
{
  return __builtin_aarch64_fabdsf (__a, __b);
}

__extension__ extern __inline float64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdd_f64 (float64_t __a, float64_t __b)
{
  return __builtin_aarch64_fabddf (__a, __b);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_f32 (float32x2_t __a, float32x2_t __b)
{
  return __builtin_aarch64_fabdv2sf (__a, __b);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabd_f64 (float64x1_t __a, float64x1_t __b)
{
  return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0),
				   vget_lane_f64 (__b, 0))};
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_f32 (float32x4_t __a, float32x4_t __b)
{
  return __builtin_aarch64_fabdv4sf (__a, __b);
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdq_f64 (float64x2_t __a, float64x2_t __b)
{
  return __builtin_aarch64_fabdv2df (__a, __b);
}

/* vabs  */

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_f32 (float32x2_t __a)
{
  return __builtin_aarch64_absv2sf (__a);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_f64 (float64x1_t __a)
{
  return (float64x1_t) {__builtin_fabs (__a[0])};
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_s8 (int8x8_t __a)
{
  return __builtin_aarch64_absv8qi (__a);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_s16 (int16x4_t __a)
{
  return __builtin_aarch64_absv4hi (__a);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_s32 (int32x2_t __a)
{
  return __builtin_aarch64_absv2si (__a);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabs_s64 (int64x1_t __a)
{
  return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_f32 (float32x4_t __a)
{
  return __builtin_aarch64_absv4sf (__a);
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_f64 (float64x2_t __a)
{
  return __builtin_aarch64_absv2df (__a);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_s8 (int8x16_t __a)
{
  return __builtin_aarch64_absv16qi (__a);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_s16 (int16x8_t __a)
{
  return __builtin_aarch64_absv8hi (__a);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_s32 (int32x4_t __a)
{
  return __builtin_aarch64_absv4si (__a);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsq_s64 (int64x2_t __a)
{
  return __builtin_aarch64_absv2di (__a);
}

/* Try to avoid moving between integer and vector registers.
   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
   There is a testcase related to this issue:
   gcc.target/aarch64/vabsd_s64.c.  */

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsd_s64 (int64_t __a)
{
  return __a < 0 ? - (uint64_t) __a : __a;
}

/* vadd */

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddd_s64 (int64_t __a, int64_t __b)
{
  return __a + __b;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddd_u64 (uint64_t __a, uint64_t __b)
{
  return __a + __b;
}

/* vaddv */

__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_s8 (int8x8_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_s16 (int16x4_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_s32 (int32x2_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
}

__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_u8 (uint8x8_t __a)
{
  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_u16 (uint16x4_t __a)
{
  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_u32 (uint32x2_t __a)
{
  return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
}

__extension__ extern __inline int8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_s8 (int8x16_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_s16 (int16x8_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_s32 (int32x4_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
}

__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_s64 (int64x2_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
}

__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_u8 (uint8x16_t __a)
{
  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
}

__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_u16 (uint16x8_t __a)
{
  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_u32 (uint32x4_t __a)
{
  return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_u64 (uint64x2_t __a)
{
  return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
}

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddv_f32 (float32x2_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
}

__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_f32 (float32x4_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
}

__extension__ extern __inline float64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddvq_f64 (float64x2_t __a)
{
  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
}

/* vbsl  */

__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
{
  return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c);
}

__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
{
  return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
}

__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
{
  return (float64x1_t)
    { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
}

__extension__ extern __inline poly8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
{
  return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
}

__extension__ extern __inline poly16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
{
  return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
}
__extension__ extern __inline poly64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
{
  return (poly64x1_t)
      {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
}

__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
{
  return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
{
  return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
{
  return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
}

__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
{
  return (int64x1_t)
      {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
{
  return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
{
  return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
{
  return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
{
  return (uint64x1_t)
      {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
}

__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
{
  return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c);
}

__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
{
  return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
}

__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
{
  return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
}

__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
{
  return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
}

__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
{
  return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
}

__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
{
  return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
}

__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
{
  return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
}

__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
{
  return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
{
  return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
  return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
  return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
  return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
}

/* ARMv8.1-A instrinsics.  */
#pragma GCC push_options
#pragma GCC target ("+nothing+rdma")

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
{
  return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
{
  return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
{
  return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
{
  return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
  return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
  return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
{
  return  __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
{
  return  __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
{
  return  __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
{
  return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
{
  return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
}

__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
{
  return  __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
}

__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
}

__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
{
  return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
}

__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
{
  return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
}

__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
{
  return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
}
#pragma GCC pop_options

#pragma GCC push_options
#pragma GCC target ("+nothing+crypto")
/* vaes  */

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaeseq_u8 (uint8x16_t data, uint8x16_t key)
{
  return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaesdq_u8 (uint8x16_t data, uint8x16_t key)
{
  return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaesmcq_u8 (uint8x16_t data)
{
  return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaesimcq_u8 (uint8x16_t data)
{
  return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
}
#pragma GCC pop_options

/* vcage  */

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcage_f64 (float64x1_t __a, float64x1_t __b)
{
  return vabs_f64 (__a) >= vabs_f64 (__b);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcages_f32 (float32_t __a, float32_t __b)
{
  return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcage_f32 (float32x2_t __a, float32x2_t __b)
{
  return vabs_f32 (__a) >= vabs_f32 (__b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcageq_f32 (float32x4_t __a, float32x4_t __b)
{
  return vabsq_f32 (__a) >= vabsq_f32 (__b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaged_f64 (float64_t __a, float64_t __b)
{
  return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcageq_f64 (float64x2_t __a, float64x2_t __b)
{
  return vabsq_f64 (__a) >= vabsq_f64 (__b);
}

/* vcagt  */

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagts_f32 (float32_t __a, float32_t __b)
{
  return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagt_f32 (float32x2_t __a, float32x2_t __b)
{
  return vabs_f32 (__a) > vabs_f32 (__b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagt_f64 (float64x1_t __a, float64x1_t __b)
{
  return vabs_f64 (__a) > vabs_f64 (__b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagtq_f32 (float32x4_t __a, float32x4_t __b)
{
  return vabsq_f32 (__a) > vabsq_f32 (__b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagtd_f64 (float64_t __a, float64_t __b)
{
  return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagtq_f64 (float64x2_t __a, float64x2_t __b)
{
  return vabsq_f64 (__a) > vabsq_f64 (__b);
}

/* vcale  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcale_f32 (float32x2_t __a, float32x2_t __b)
{
  return vabs_f32 (__a) <= vabs_f32 (__b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcale_f64 (float64x1_t __a, float64x1_t __b)
{
  return vabs_f64 (__a) <= vabs_f64 (__b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaled_f64 (float64_t __a, float64_t __b)
{
  return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcales_f32 (float32_t __a, float32_t __b)
{
  return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaleq_f32 (float32x4_t __a, float32x4_t __b)
{
  return vabsq_f32 (__a) <= vabsq_f32 (__b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaleq_f64 (float64x2_t __a, float64x2_t __b)
{
  return vabsq_f64 (__a) <= vabsq_f64 (__b);
}

/* vcalt  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcalt_f32 (float32x2_t __a, float32x2_t __b)
{
  return vabs_f32 (__a) < vabs_f32 (__b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcalt_f64 (float64x1_t __a, float64x1_t __b)
{
  return vabs_f64 (__a) < vabs_f64 (__b);
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaltd_f64 (float64_t __a, float64_t __b)
{
  return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaltq_f32 (float32x4_t __a, float32x4_t __b)
{
  return vabsq_f32 (__a) < vabsq_f32 (__b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaltq_f64 (float64x2_t __a, float64x2_t __b)
{
  return vabsq_f64 (__a) < vabsq_f64 (__b);
}

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcalts_f32 (float32_t __a, float32_t __b)
{
  return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
}

/* vceq - vector.  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_f32 (float32x2_t __a, float32x2_t __b)
{
  return (uint32x2_t) (__a == __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_f64 (float64x1_t __a, float64x1_t __b)
{
  return (uint64x1_t) (__a == __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
{
  return (uint8x8_t) (__a == __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_p64 (poly64x1_t __a, poly64x1_t __b)
{
  return (uint64x1_t) (__a == __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_s8 (int8x8_t __a, int8x8_t __b)
{
  return (uint8x8_t) (__a == __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_s16 (int16x4_t __a, int16x4_t __b)
{
  return (uint16x4_t) (__a == __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_s32 (int32x2_t __a, int32x2_t __b)
{
  return (uint32x2_t) (__a == __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_s64 (int64x1_t __a, int64x1_t __b)
{
  return (uint64x1_t) (__a == __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_f32 (float32x4_t __a, float32x4_t __b)
{
  return (uint32x4_t) (__a == __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_f64 (float64x2_t __a, float64x2_t __b)
{
  return (uint64x2_t) (__a == __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
{
  return (uint8x16_t) (__a == __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (uint8x16_t) (__a == __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (uint16x8_t) (__a == __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (uint32x4_t) (__a == __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_s64 (int64x2_t __a, int64x2_t __b)
{
  return (uint64x2_t) (__a == __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (__a == __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (__a == __b);
}

/* vceq - scalar.  */

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqs_f32 (float32_t __a, float32_t __b)
{
  return __a == __b ? -1 : 0;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqd_s64 (int64_t __a, int64_t __b)
{
  return __a == __b ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqd_u64 (uint64_t __a, uint64_t __b)
{
  return __a == __b ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqd_f64 (float64_t __a, float64_t __b)
{
  return __a == __b ? -1ll : 0ll;
}

/* vceqz - vector.  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_f32 (float32x2_t __a)
{
  return (uint32x2_t) (__a == 0.0f);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_f64 (float64x1_t __a)
{
  return (uint64x1_t) (__a == (float64x1_t) {0.0});
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_p8 (poly8x8_t __a)
{
  return (uint8x8_t) (__a == 0);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_s8 (int8x8_t __a)
{
  return (uint8x8_t) (__a == 0);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_s16 (int16x4_t __a)
{
  return (uint16x4_t) (__a == 0);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_s32 (int32x2_t __a)
{
  return (uint32x2_t) (__a == 0);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_s64 (int64x1_t __a)
{
  return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_u8 (uint8x8_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_u16 (uint16x4_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_u32 (uint32x2_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqz_u64 (uint64x1_t __a)
{
  return (__a == __AARCH64_UINT64_C (0));
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_f32 (float32x4_t __a)
{
  return (uint32x4_t) (__a == 0.0f);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_f64 (float64x2_t __a)
{
  return (uint64x2_t) (__a == 0.0f);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_p8 (poly8x16_t __a)
{
  return (uint8x16_t) (__a == 0);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_s8 (int8x16_t __a)
{
  return (uint8x16_t) (__a == 0);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_s16 (int16x8_t __a)
{
  return (uint16x8_t) (__a == 0);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_s32 (int32x4_t __a)
{
  return (uint32x4_t) (__a == 0);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_s64 (int64x2_t __a)
{
  return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_u8 (uint8x16_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_u16 (uint16x8_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_u32 (uint32x4_t __a)
{
  return (__a == 0);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_u64 (uint64x2_t __a)
{
  return (__a == __AARCH64_UINT64_C (0));
}

/* vceqz - scalar.  */

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzs_f32 (float32_t __a)
{
  return __a == 0.0f ? -1 : 0;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzd_s64 (int64_t __a)
{
  return __a == 0 ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzd_u64 (uint64_t __a)
{
  return __a == 0 ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzd_f64 (float64_t __a)
{
  return __a == 0.0 ? -1ll : 0ll;
}

/* vcge - vector.  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_f32 (float32x2_t __a, float32x2_t __b)
{
  return (uint32x2_t) (__a >= __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_f64 (float64x1_t __a, float64x1_t __b)
{
  return (uint64x1_t) (__a >= __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_s8 (int8x8_t __a, int8x8_t __b)
{
  return (uint8x8_t) (__a >= __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_s16 (int16x4_t __a, int16x4_t __b)
{
  return (uint16x4_t) (__a >= __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_s32 (int32x2_t __a, int32x2_t __b)
{
  return (uint32x2_t) (__a >= __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_s64 (int64x1_t __a, int64x1_t __b)
{
  return (uint64x1_t) (__a >= __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_f32 (float32x4_t __a, float32x4_t __b)
{
  return (uint32x4_t) (__a >= __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_f64 (float64x2_t __a, float64x2_t __b)
{
  return (uint64x2_t) (__a >= __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (uint8x16_t) (__a >= __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (uint16x8_t) (__a >= __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (uint32x4_t) (__a >= __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_s64 (int64x2_t __a, int64x2_t __b)
{
  return (uint64x2_t) (__a >= __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
{
  return (__a >= __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
{
  return (__a >= __b);
}

/* vcge - scalar.  */

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcges_f32 (float32_t __a, float32_t __b)
{
  return __a >= __b ? -1 : 0;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcged_s64 (int64_t __a, int64_t __b)
{
  return __a >= __b ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcged_u64 (uint64_t __a, uint64_t __b)
{
  return __a >= __b ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcged_f64 (float64_t __a, float64_t __b)
{
  return __a >= __b ? -1ll : 0ll;
}

/* vcgez - vector.  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_f32 (float32x2_t __a)
{
  return (uint32x2_t) (__a >= 0.0f);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_f64 (float64x1_t __a)
{
  return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_s8 (int8x8_t __a)
{
  return (uint8x8_t) (__a >= 0);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_s16 (int16x4_t __a)
{
  return (uint16x4_t) (__a >= 0);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_s32 (int32x2_t __a)
{
  return (uint32x2_t) (__a >= 0);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgez_s64 (int64x1_t __a)
{
  return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_f32 (float32x4_t __a)
{
  return (uint32x4_t) (__a >= 0.0f);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_f64 (float64x2_t __a)
{
  return (uint64x2_t) (__a >= 0.0);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_s8 (int8x16_t __a)
{
  return (uint8x16_t) (__a >= 0);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_s16 (int16x8_t __a)
{
  return (uint16x8_t) (__a >= 0);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_s32 (int32x4_t __a)
{
  return (uint32x4_t) (__a >= 0);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezq_s64 (int64x2_t __a)
{
  return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
}

/* vcgez - scalar.  */

__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezs_f32 (float32_t __a)
{
  return __a >= 0.0f ? -1 : 0;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezd_s64 (int64_t __a)
{
  return __a >= 0 ? -1ll : 0ll;
}

__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezd_f64 (float64_t __a)
{
  return __a >= 0.0 ? -1ll : 0ll;
}

/* vcgt - vector.  */

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_f32 (float32x2_t __a, float32x2_t __b)
{
  return (uint32x2_t) (__a > __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_f64 (float64x1_t __a, float64x1_t __b)
{
  return (uint64x1_t) (__a > __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_s8 (int8x8_t __a, int8x8_t __b)
{
  return (uint8x8_t) (__a > __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_s16 (int16x4_t __a, int16x4_t __b)
{
  return (uint16x4_t) (__a > __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_s32 (int32x2_t __a, int32x2_t __b)
{
  return (uint32x2_t) (__a > __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_s64 (int64x1_t __a, int64x1_t __b)
{
  return (uint64x1_t) (__a > __b);
}

__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
{
  return (__a > __b);
}

__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
{
  return (__a > __b);
}

__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
{
  return (__a > __b);
}

__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
{
  return (__a > __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_f32 (float32x4_t __a, float32x4_t __b)
{
  return (uint32x4_t) (__a > __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_f64 (float64x2_t __a, float64x2_t __b)
{
  return (uint64x2_t) (__a > __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_s8 (int8x16_t __a, int8x16_t __b)
{
  return (uint8x16_t) (__a > __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_s16 (int16x8_t __a, int16x8_t __b)
{
  return (uint16x8_t) (__a > __b);
}

__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_s32 (int32x4_t __a, int32x4_t __b)
{
  return (uint32x4_t) (__a > __b);
}

__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_s64 (int64x2_t __a, int64x2_t __b)
{
  return (uint64x2_t) (__a > __b);
}

__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
{
  return (__a > __b);
}

__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
{
  return (__a > __b);
}