Mercurial > hg > CbC > CbC_gcc
view gcc/config/aarch64/arm_neon.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line source
/* ARM NEON intrinsics include file. Copyright (C) 2011-2018 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _AARCH64_NEON_H_ #define _AARCH64_NEON_H_ #pragma GCC push_options #pragma GCC target ("+nothing+simd") #include <stdint.h> #define __AARCH64_UINT64_C(__C) ((uint64_t) __C) #define __AARCH64_INT64_C(__C) ((int64_t) __C) typedef __Int8x8_t int8x8_t; typedef __Int16x4_t int16x4_t; typedef __Int32x2_t int32x2_t; typedef __Int64x1_t int64x1_t; typedef __Float16x4_t float16x4_t; typedef __Float32x2_t float32x2_t; typedef __Poly8x8_t poly8x8_t; typedef __Poly16x4_t poly16x4_t; typedef __Uint8x8_t uint8x8_t; typedef __Uint16x4_t uint16x4_t; typedef __Uint32x2_t uint32x2_t; typedef __Float64x1_t float64x1_t; typedef __Uint64x1_t uint64x1_t; typedef __Int8x16_t int8x16_t; typedef __Int16x8_t int16x8_t; typedef __Int32x4_t int32x4_t; typedef __Int64x2_t int64x2_t; typedef __Float16x8_t float16x8_t; typedef __Float32x4_t float32x4_t; typedef __Float64x2_t float64x2_t; typedef __Poly8x16_t poly8x16_t; typedef __Poly16x8_t poly16x8_t; typedef __Poly64x2_t poly64x2_t; typedef __Poly64x1_t poly64x1_t; typedef __Uint8x16_t uint8x16_t; typedef __Uint16x8_t uint16x8_t; typedef __Uint32x4_t uint32x4_t; typedef __Uint64x2_t uint64x2_t; typedef __Poly8_t poly8_t; typedef __Poly16_t poly16_t; typedef __Poly64_t poly64_t; typedef __Poly128_t poly128_t; typedef __fp16 float16_t; typedef float float32_t; typedef double float64_t; typedef struct int8x8x2_t { int8x8_t val[2]; } int8x8x2_t; typedef struct int8x16x2_t { int8x16_t val[2]; } int8x16x2_t; typedef struct int16x4x2_t { int16x4_t val[2]; } int16x4x2_t; typedef struct int16x8x2_t { int16x8_t val[2]; } int16x8x2_t; typedef struct int32x2x2_t { int32x2_t val[2]; } int32x2x2_t; typedef struct int32x4x2_t { int32x4_t val[2]; } int32x4x2_t; typedef struct int64x1x2_t { int64x1_t val[2]; } int64x1x2_t; typedef struct int64x2x2_t { int64x2_t val[2]; } int64x2x2_t; typedef struct uint8x8x2_t { uint8x8_t val[2]; } uint8x8x2_t; typedef struct uint8x16x2_t { uint8x16_t val[2]; } uint8x16x2_t; typedef struct uint16x4x2_t { uint16x4_t val[2]; } uint16x4x2_t; typedef struct uint16x8x2_t { uint16x8_t val[2]; } uint16x8x2_t; typedef struct uint32x2x2_t { uint32x2_t val[2]; } uint32x2x2_t; typedef struct uint32x4x2_t { uint32x4_t val[2]; } uint32x4x2_t; typedef struct uint64x1x2_t { uint64x1_t val[2]; } uint64x1x2_t; typedef struct uint64x2x2_t { uint64x2_t val[2]; } uint64x2x2_t; typedef struct float16x4x2_t { float16x4_t val[2]; } float16x4x2_t; typedef struct float16x8x2_t { float16x8_t val[2]; } float16x8x2_t; typedef struct float32x2x2_t { float32x2_t val[2]; } float32x2x2_t; typedef struct float32x4x2_t { float32x4_t val[2]; } float32x4x2_t; typedef struct float64x2x2_t { float64x2_t val[2]; } float64x2x2_t; typedef struct float64x1x2_t { float64x1_t val[2]; } float64x1x2_t; typedef struct poly8x8x2_t { poly8x8_t val[2]; } poly8x8x2_t; typedef struct poly8x16x2_t { poly8x16_t val[2]; } poly8x16x2_t; typedef struct poly16x4x2_t { poly16x4_t val[2]; } poly16x4x2_t; typedef struct poly16x8x2_t { poly16x8_t val[2]; } poly16x8x2_t; typedef struct poly64x1x2_t { poly64x1_t val[2]; } poly64x1x2_t; typedef struct poly64x1x3_t { poly64x1_t val[3]; } poly64x1x3_t; typedef struct poly64x1x4_t { poly64x1_t val[4]; } poly64x1x4_t; typedef struct poly64x2x2_t { poly64x2_t val[2]; } poly64x2x2_t; typedef struct poly64x2x3_t { poly64x2_t val[3]; } poly64x2x3_t; typedef struct poly64x2x4_t { poly64x2_t val[4]; } poly64x2x4_t; typedef struct int8x8x3_t { int8x8_t val[3]; } int8x8x3_t; typedef struct int8x16x3_t { int8x16_t val[3]; } int8x16x3_t; typedef struct int16x4x3_t { int16x4_t val[3]; } int16x4x3_t; typedef struct int16x8x3_t { int16x8_t val[3]; } int16x8x3_t; typedef struct int32x2x3_t { int32x2_t val[3]; } int32x2x3_t; typedef struct int32x4x3_t { int32x4_t val[3]; } int32x4x3_t; typedef struct int64x1x3_t { int64x1_t val[3]; } int64x1x3_t; typedef struct int64x2x3_t { int64x2_t val[3]; } int64x2x3_t; typedef struct uint8x8x3_t { uint8x8_t val[3]; } uint8x8x3_t; typedef struct uint8x16x3_t { uint8x16_t val[3]; } uint8x16x3_t; typedef struct uint16x4x3_t { uint16x4_t val[3]; } uint16x4x3_t; typedef struct uint16x8x3_t { uint16x8_t val[3]; } uint16x8x3_t; typedef struct uint32x2x3_t { uint32x2_t val[3]; } uint32x2x3_t; typedef struct uint32x4x3_t { uint32x4_t val[3]; } uint32x4x3_t; typedef struct uint64x1x3_t { uint64x1_t val[3]; } uint64x1x3_t; typedef struct uint64x2x3_t { uint64x2_t val[3]; } uint64x2x3_t; typedef struct float16x4x3_t { float16x4_t val[3]; } float16x4x3_t; typedef struct float16x8x3_t { float16x8_t val[3]; } float16x8x3_t; typedef struct float32x2x3_t { float32x2_t val[3]; } float32x2x3_t; typedef struct float32x4x3_t { float32x4_t val[3]; } float32x4x3_t; typedef struct float64x2x3_t { float64x2_t val[3]; } float64x2x3_t; typedef struct float64x1x3_t { float64x1_t val[3]; } float64x1x3_t; typedef struct poly8x8x3_t { poly8x8_t val[3]; } poly8x8x3_t; typedef struct poly8x16x3_t { poly8x16_t val[3]; } poly8x16x3_t; typedef struct poly16x4x3_t { poly16x4_t val[3]; } poly16x4x3_t; typedef struct poly16x8x3_t { poly16x8_t val[3]; } poly16x8x3_t; typedef struct int8x8x4_t { int8x8_t val[4]; } int8x8x4_t; typedef struct int8x16x4_t { int8x16_t val[4]; } int8x16x4_t; typedef struct int16x4x4_t { int16x4_t val[4]; } int16x4x4_t; typedef struct int16x8x4_t { int16x8_t val[4]; } int16x8x4_t; typedef struct int32x2x4_t { int32x2_t val[4]; } int32x2x4_t; typedef struct int32x4x4_t { int32x4_t val[4]; } int32x4x4_t; typedef struct int64x1x4_t { int64x1_t val[4]; } int64x1x4_t; typedef struct int64x2x4_t { int64x2_t val[4]; } int64x2x4_t; typedef struct uint8x8x4_t { uint8x8_t val[4]; } uint8x8x4_t; typedef struct uint8x16x4_t { uint8x16_t val[4]; } uint8x16x4_t; typedef struct uint16x4x4_t { uint16x4_t val[4]; } uint16x4x4_t; typedef struct uint16x8x4_t { uint16x8_t val[4]; } uint16x8x4_t; typedef struct uint32x2x4_t { uint32x2_t val[4]; } uint32x2x4_t; typedef struct uint32x4x4_t { uint32x4_t val[4]; } uint32x4x4_t; typedef struct uint64x1x4_t { uint64x1_t val[4]; } uint64x1x4_t; typedef struct uint64x2x4_t { uint64x2_t val[4]; } uint64x2x4_t; typedef struct float16x4x4_t { float16x4_t val[4]; } float16x4x4_t; typedef struct float16x8x4_t { float16x8_t val[4]; } float16x8x4_t; typedef struct float32x2x4_t { float32x2_t val[4]; } float32x2x4_t; typedef struct float32x4x4_t { float32x4_t val[4]; } float32x4x4_t; typedef struct float64x2x4_t { float64x2_t val[4]; } float64x2x4_t; typedef struct float64x1x4_t { float64x1_t val[4]; } float64x1x4_t; typedef struct poly8x8x4_t { poly8x8_t val[4]; } poly8x8x4_t; typedef struct poly8x16x4_t { poly8x16_t val[4]; } poly8x16x4_t; typedef struct poly16x4x4_t { poly16x4_t val[4]; } poly16x4x4_t; typedef struct poly16x8x4_t { poly16x8_t val[4]; } poly16x8x4_t; /* __aarch64_vdup_lane internal macros. */ #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \ vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b)) #define __aarch64_vdup_lane_f16(__a, __b) \ __aarch64_vdup_lane_any (f16, , __a, __b) #define __aarch64_vdup_lane_f32(__a, __b) \ __aarch64_vdup_lane_any (f32, , __a, __b) #define __aarch64_vdup_lane_f64(__a, __b) \ __aarch64_vdup_lane_any (f64, , __a, __b) #define __aarch64_vdup_lane_p8(__a, __b) \ __aarch64_vdup_lane_any (p8, , __a, __b) #define __aarch64_vdup_lane_p16(__a, __b) \ __aarch64_vdup_lane_any (p16, , __a, __b) #define __aarch64_vdup_lane_p64(__a, __b) \ __aarch64_vdup_lane_any (p64, , __a, __b) #define __aarch64_vdup_lane_s8(__a, __b) \ __aarch64_vdup_lane_any (s8, , __a, __b) #define __aarch64_vdup_lane_s16(__a, __b) \ __aarch64_vdup_lane_any (s16, , __a, __b) #define __aarch64_vdup_lane_s32(__a, __b) \ __aarch64_vdup_lane_any (s32, , __a, __b) #define __aarch64_vdup_lane_s64(__a, __b) \ __aarch64_vdup_lane_any (s64, , __a, __b) #define __aarch64_vdup_lane_u8(__a, __b) \ __aarch64_vdup_lane_any (u8, , __a, __b) #define __aarch64_vdup_lane_u16(__a, __b) \ __aarch64_vdup_lane_any (u16, , __a, __b) #define __aarch64_vdup_lane_u32(__a, __b) \ __aarch64_vdup_lane_any (u32, , __a, __b) #define __aarch64_vdup_lane_u64(__a, __b) \ __aarch64_vdup_lane_any (u64, , __a, __b) /* __aarch64_vdup_laneq internal macros. */ #define __aarch64_vdup_laneq_f16(__a, __b) \ __aarch64_vdup_lane_any (f16, , __a, __b) #define __aarch64_vdup_laneq_f32(__a, __b) \ __aarch64_vdup_lane_any (f32, , __a, __b) #define __aarch64_vdup_laneq_f64(__a, __b) \ __aarch64_vdup_lane_any (f64, , __a, __b) #define __aarch64_vdup_laneq_p8(__a, __b) \ __aarch64_vdup_lane_any (p8, , __a, __b) #define __aarch64_vdup_laneq_p16(__a, __b) \ __aarch64_vdup_lane_any (p16, , __a, __b) #define __aarch64_vdup_laneq_p64(__a, __b) \ __aarch64_vdup_lane_any (p64, , __a, __b) #define __aarch64_vdup_laneq_s8(__a, __b) \ __aarch64_vdup_lane_any (s8, , __a, __b) #define __aarch64_vdup_laneq_s16(__a, __b) \ __aarch64_vdup_lane_any (s16, , __a, __b) #define __aarch64_vdup_laneq_s32(__a, __b) \ __aarch64_vdup_lane_any (s32, , __a, __b) #define __aarch64_vdup_laneq_s64(__a, __b) \ __aarch64_vdup_lane_any (s64, , __a, __b) #define __aarch64_vdup_laneq_u8(__a, __b) \ __aarch64_vdup_lane_any (u8, , __a, __b) #define __aarch64_vdup_laneq_u16(__a, __b) \ __aarch64_vdup_lane_any (u16, , __a, __b) #define __aarch64_vdup_laneq_u32(__a, __b) \ __aarch64_vdup_lane_any (u32, , __a, __b) #define __aarch64_vdup_laneq_u64(__a, __b) \ __aarch64_vdup_lane_any (u64, , __a, __b) /* __aarch64_vdupq_lane internal macros. */ #define __aarch64_vdupq_lane_f16(__a, __b) \ __aarch64_vdup_lane_any (f16, q, __a, __b) #define __aarch64_vdupq_lane_f32(__a, __b) \ __aarch64_vdup_lane_any (f32, q, __a, __b) #define __aarch64_vdupq_lane_f64(__a, __b) \ __aarch64_vdup_lane_any (f64, q, __a, __b) #define __aarch64_vdupq_lane_p8(__a, __b) \ __aarch64_vdup_lane_any (p8, q, __a, __b) #define __aarch64_vdupq_lane_p16(__a, __b) \ __aarch64_vdup_lane_any (p16, q, __a, __b) #define __aarch64_vdupq_lane_p64(__a, __b) \ __aarch64_vdup_lane_any (p64, q, __a, __b) #define __aarch64_vdupq_lane_s8(__a, __b) \ __aarch64_vdup_lane_any (s8, q, __a, __b) #define __aarch64_vdupq_lane_s16(__a, __b) \ __aarch64_vdup_lane_any (s16, q, __a, __b) #define __aarch64_vdupq_lane_s32(__a, __b) \ __aarch64_vdup_lane_any (s32, q, __a, __b) #define __aarch64_vdupq_lane_s64(__a, __b) \ __aarch64_vdup_lane_any (s64, q, __a, __b) #define __aarch64_vdupq_lane_u8(__a, __b) \ __aarch64_vdup_lane_any (u8, q, __a, __b) #define __aarch64_vdupq_lane_u16(__a, __b) \ __aarch64_vdup_lane_any (u16, q, __a, __b) #define __aarch64_vdupq_lane_u32(__a, __b) \ __aarch64_vdup_lane_any (u32, q, __a, __b) #define __aarch64_vdupq_lane_u64(__a, __b) \ __aarch64_vdup_lane_any (u64, q, __a, __b) /* __aarch64_vdupq_laneq internal macros. */ #define __aarch64_vdupq_laneq_f16(__a, __b) \ __aarch64_vdup_lane_any (f16, q, __a, __b) #define __aarch64_vdupq_laneq_f32(__a, __b) \ __aarch64_vdup_lane_any (f32, q, __a, __b) #define __aarch64_vdupq_laneq_f64(__a, __b) \ __aarch64_vdup_lane_any (f64, q, __a, __b) #define __aarch64_vdupq_laneq_p8(__a, __b) \ __aarch64_vdup_lane_any (p8, q, __a, __b) #define __aarch64_vdupq_laneq_p16(__a, __b) \ __aarch64_vdup_lane_any (p16, q, __a, __b) #define __aarch64_vdupq_laneq_p64(__a, __b) \ __aarch64_vdup_lane_any (p64, q, __a, __b) #define __aarch64_vdupq_laneq_s8(__a, __b) \ __aarch64_vdup_lane_any (s8, q, __a, __b) #define __aarch64_vdupq_laneq_s16(__a, __b) \ __aarch64_vdup_lane_any (s16, q, __a, __b) #define __aarch64_vdupq_laneq_s32(__a, __b) \ __aarch64_vdup_lane_any (s32, q, __a, __b) #define __aarch64_vdupq_laneq_s64(__a, __b) \ __aarch64_vdup_lane_any (s64, q, __a, __b) #define __aarch64_vdupq_laneq_u8(__a, __b) \ __aarch64_vdup_lane_any (u8, q, __a, __b) #define __aarch64_vdupq_laneq_u16(__a, __b) \ __aarch64_vdup_lane_any (u16, q, __a, __b) #define __aarch64_vdupq_laneq_u32(__a, __b) \ __aarch64_vdup_lane_any (u32, q, __a, __b) #define __aarch64_vdupq_laneq_u64(__a, __b) \ __aarch64_vdup_lane_any (u64, q, __a, __b) /* Internal macro for lane indices. */ #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) #define __AARCH64_LANE_CHECK(__vec, __idx) \ __builtin_aarch64_im_lane_boundsi (sizeof(__vec), sizeof(__vec[0]), __idx) /* For big-endian, GCC's vector indices are the opposite way around to the architectural lane indices used by Neon intrinsics. */ #ifdef __AARCH64EB__ #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx) #else #define __aarch64_lane(__vec, __idx) __idx #endif /* vget_lane internal macro. */ #define __aarch64_vget_lane_any(__vec, __index) \ __extension__ \ ({ \ __AARCH64_LANE_CHECK (__vec, __index); \ __vec[__aarch64_lane (__vec, __index)]; \ }) /* vset_lane and vld1_lane internal macro. */ #define __aarch64_vset_lane_any(__elem, __vec, __index) \ __extension__ \ ({ \ __AARCH64_LANE_CHECK (__vec, __index); \ __vec[__aarch64_lane (__vec, __index)] = __elem; \ __vec; \ }) /* vadd */ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { return __a + __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_s16 (int16x4_t __a, int16x4_t __b) { return __a + __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_s32 (int32x2_t __a, int32x2_t __b) { return __a + __b; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_f32 (float32x2_t __a, float32x2_t __b) { return __a + __b; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_f64 (float64x1_t __a, float64x1_t __b) { return __a + __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_u8 (uint8x8_t __a, uint8x8_t __b) { return __a + __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_u16 (uint16x4_t __a, uint16x4_t __b) { return __a + __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_u32 (uint32x2_t __a, uint32x2_t __b) { return __a + __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_s64 (int64x1_t __a, int64x1_t __b) { return __a + __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_u64 (uint64x1_t __a, uint64x1_t __b) { return __a + __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_s8 (int8x16_t __a, int8x16_t __b) { return __a + __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_s16 (int16x8_t __a, int16x8_t __b) { return __a + __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_s32 (int32x4_t __a, int32x4_t __b) { return __a + __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_s64 (int64x2_t __a, int64x2_t __b) { return __a + __b; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_f32 (float32x4_t __a, float32x4_t __b) { return __a + __b; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_f64 (float64x2_t __a, float64x2_t __b) { return __a + __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a + __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a + __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a + __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a + __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s8 (int8x8_t __a, int8x8_t __b) { return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s16 (int16x4_t __a, int16x4_t __b) { return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s32 (int32x2_t __a, int32x2_t __b) { return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s8 (int8x16_t __a, int8x16_t __b) { return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s16 (int16x8_t __a, int16x8_t __b) { return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s32 (int32x4_t __a, int32x4_t __b) { return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b) { return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s8 (int16x8_t __a, int8x8_t __b) { return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s16 (int32x4_t __a, int16x4_t __b) { return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s32 (int64x2_t __a, int32x2_t __b) { return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u8 (uint16x8_t __a, uint8x8_t __b) { return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u16 (uint32x4_t __a, uint16x4_t __b) { return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u32 (uint64x2_t __a, uint32x2_t __b) { return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s8 (int16x8_t __a, int8x16_t __b) { return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s16 (int32x4_t __a, int16x8_t __b) { return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s32 (int64x2_t __a, int32x4_t __b) { return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b) { return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b) { return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b) { return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a, (int32x4_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s8 (int8x8_t __a, int8x8_t __b) { return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s8 (int8x16_t __a, int8x16_t __b) { return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) { return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s8 (int8x8_t __a, int8x8_t __b) { return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s8 (int8x16_t __a, int8x16_t __b) { return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) { return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s16 (int16x8_t __a, int16x8_t __b) { return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s32 (int32x4_t __a, int32x4_t __b) { return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s64 (int64x2_t __a, int64x2_t __b) { return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) { return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a, (int64x2_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s16 (int16x8_t __a, int16x8_t __b) { return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s32 (int32x4_t __a, int32x4_t __b) { return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s64 (int64x2_t __a, int64x2_t __b) { return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) { return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a, (int64x2_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdiv_f32 (float32x2_t __a, float32x2_t __b) { return __a / __b; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdiv_f64 (float64x1_t __a, float64x1_t __b) { return __a / __b; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdivq_f32 (float32x4_t __a, float32x4_t __b) { return __a / __b; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdivq_f64 (float64x2_t __a, float64x2_t __b) { return __a / __b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_s8 (int8x8_t __a, int8x8_t __b) { return __a * __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_s16 (int16x4_t __a, int16x4_t __b) { return __a * __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_s32 (int32x2_t __a, int32x2_t __b) { return __a * __b; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_f32 (float32x2_t __a, float32x2_t __b) { return __a * __b; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_f64 (float64x1_t __a, float64x1_t __b) { return __a * __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_u8 (uint8x8_t __a, uint8x8_t __b) { return __a * __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_u16 (uint16x4_t __a, uint16x4_t __b) { return __a * __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_u32 (uint32x2_t __a, uint32x2_t __b) { return __a * __b; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_p8 (poly8x8_t __a, poly8x8_t __b) { return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_s8 (int8x16_t __a, int8x16_t __b) { return __a * __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_s16 (int16x8_t __a, int16x8_t __b) { return __a * __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_s32 (int32x4_t __a, int32x4_t __b) { return __a * __b; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_f32 (float32x4_t __a, float32x4_t __b) { return __a * __b; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_f64 (float64x2_t __a, float64x2_t __b) { return __a * __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a * __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a * __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a * __b; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_p8 (poly8x16_t __a, poly8x16_t __b) { return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_s8 (int8x8_t __a, int8x8_t __b) { return __a & __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_s16 (int16x4_t __a, int16x4_t __b) { return __a & __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_s32 (int32x2_t __a, int32x2_t __b) { return __a & __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_u8 (uint8x8_t __a, uint8x8_t __b) { return __a & __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_u16 (uint16x4_t __a, uint16x4_t __b) { return __a & __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_u32 (uint32x2_t __a, uint32x2_t __b) { return __a & __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_s64 (int64x1_t __a, int64x1_t __b) { return __a & __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vand_u64 (uint64x1_t __a, uint64x1_t __b) { return __a & __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_s8 (int8x16_t __a, int8x16_t __b) { return __a & __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_s16 (int16x8_t __a, int16x8_t __b) { return __a & __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_s32 (int32x4_t __a, int32x4_t __b) { return __a & __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_s64 (int64x2_t __a, int64x2_t __b) { return __a & __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a & __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a & __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a & __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vandq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a & __b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_s8 (int8x8_t __a, int8x8_t __b) { return __a | __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_s16 (int16x4_t __a, int16x4_t __b) { return __a | __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_s32 (int32x2_t __a, int32x2_t __b) { return __a | __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_u8 (uint8x8_t __a, uint8x8_t __b) { return __a | __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_u16 (uint16x4_t __a, uint16x4_t __b) { return __a | __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_u32 (uint32x2_t __a, uint32x2_t __b) { return __a | __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_s64 (int64x1_t __a, int64x1_t __b) { return __a | __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorr_u64 (uint64x1_t __a, uint64x1_t __b) { return __a | __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_s8 (int8x16_t __a, int8x16_t __b) { return __a | __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_s16 (int16x8_t __a, int16x8_t __b) { return __a | __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_s32 (int32x4_t __a, int32x4_t __b) { return __a | __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_s64 (int64x2_t __a, int64x2_t __b) { return __a | __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a | __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a | __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a | __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorrq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a | __b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_s8 (int8x8_t __a, int8x8_t __b) { return __a ^ __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_s16 (int16x4_t __a, int16x4_t __b) { return __a ^ __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_s32 (int32x2_t __a, int32x2_t __b) { return __a ^ __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_u8 (uint8x8_t __a, uint8x8_t __b) { return __a ^ __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_u16 (uint16x4_t __a, uint16x4_t __b) { return __a ^ __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_u32 (uint32x2_t __a, uint32x2_t __b) { return __a ^ __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_s64 (int64x1_t __a, int64x1_t __b) { return __a ^ __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veor_u64 (uint64x1_t __a, uint64x1_t __b) { return __a ^ __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_s8 (int8x16_t __a, int8x16_t __b) { return __a ^ __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_s16 (int16x8_t __a, int16x8_t __b) { return __a ^ __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_s32 (int32x4_t __a, int32x4_t __b) { return __a ^ __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_s64 (int64x2_t __a, int64x2_t __b) { return __a ^ __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a ^ __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a ^ __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a ^ __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) veorq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a ^ __b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_s8 (int8x8_t __a, int8x8_t __b) { return __a & ~__b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_s16 (int16x4_t __a, int16x4_t __b) { return __a & ~__b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_s32 (int32x2_t __a, int32x2_t __b) { return __a & ~__b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_u8 (uint8x8_t __a, uint8x8_t __b) { return __a & ~__b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_u16 (uint16x4_t __a, uint16x4_t __b) { return __a & ~__b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_u32 (uint32x2_t __a, uint32x2_t __b) { return __a & ~__b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_s64 (int64x1_t __a, int64x1_t __b) { return __a & ~__b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbic_u64 (uint64x1_t __a, uint64x1_t __b) { return __a & ~__b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_s8 (int8x16_t __a, int8x16_t __b) { return __a & ~__b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_s16 (int16x8_t __a, int16x8_t __b) { return __a & ~__b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_s32 (int32x4_t __a, int32x4_t __b) { return __a & ~__b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_s64 (int64x2_t __a, int64x2_t __b) { return __a & ~__b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a & ~__b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a & ~__b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a & ~__b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbicq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a & ~__b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_s8 (int8x8_t __a, int8x8_t __b) { return __a | ~__b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_s16 (int16x4_t __a, int16x4_t __b) { return __a | ~__b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_s32 (int32x2_t __a, int32x2_t __b) { return __a | ~__b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_u8 (uint8x8_t __a, uint8x8_t __b) { return __a | ~__b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_u16 (uint16x4_t __a, uint16x4_t __b) { return __a | ~__b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_u32 (uint32x2_t __a, uint32x2_t __b) { return __a | ~__b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_s64 (int64x1_t __a, int64x1_t __b) { return __a | ~__b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vorn_u64 (uint64x1_t __a, uint64x1_t __b) { return __a | ~__b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_s8 (int8x16_t __a, int8x16_t __b) { return __a | ~__b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_s16 (int16x8_t __a, int16x8_t __b) { return __a | ~__b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_s32 (int32x4_t __a, int32x4_t __b) { return __a | ~__b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_s64 (int64x2_t __a, int64x2_t __b) { return __a | ~__b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a | ~__b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a | ~__b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a | ~__b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vornq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a | ~__b; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_s8 (int8x8_t __a, int8x8_t __b) { return __a - __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_s16 (int16x4_t __a, int16x4_t __b) { return __a - __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_s32 (int32x2_t __a, int32x2_t __b) { return __a - __b; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_f32 (float32x2_t __a, float32x2_t __b) { return __a - __b; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_f64 (float64x1_t __a, float64x1_t __b) { return __a - __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_u8 (uint8x8_t __a, uint8x8_t __b) { return __a - __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_u16 (uint16x4_t __a, uint16x4_t __b) { return __a - __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_u32 (uint32x2_t __a, uint32x2_t __b) { return __a - __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_s64 (int64x1_t __a, int64x1_t __b) { return __a - __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_u64 (uint64x1_t __a, uint64x1_t __b) { return __a - __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_s8 (int8x16_t __a, int8x16_t __b) { return __a - __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_s16 (int16x8_t __a, int16x8_t __b) { return __a - __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_s32 (int32x4_t __a, int32x4_t __b) { return __a - __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_s64 (int64x2_t __a, int64x2_t __b) { return __a - __b; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_f32 (float32x4_t __a, float32x4_t __b) { return __a - __b; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_f64 (float64x2_t __a, float64x2_t __b) { return __a - __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_u8 (uint8x16_t __a, uint8x16_t __b) { return __a - __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_u16 (uint16x8_t __a, uint16x8_t __b) { return __a - __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_u32 (uint32x4_t __a, uint32x4_t __b) { return __a - __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_u64 (uint64x2_t __a, uint64x2_t __b) { return __a - __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s8 (int8x8_t __a, int8x8_t __b) { return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s16 (int16x4_t __a, int16x4_t __b) { return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s32 (int32x2_t __a, int32x2_t __b) { return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s8 (int8x16_t __a, int8x16_t __b) { return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s16 (int16x8_t __a, int16x8_t __b) { return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s32 (int32x4_t __a, int32x4_t __b) { return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b) { return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s8 (int16x8_t __a, int8x8_t __b) { return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s16 (int32x4_t __a, int16x4_t __b) { return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s32 (int64x2_t __a, int32x2_t __b) { return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u8 (uint16x8_t __a, uint8x8_t __b) { return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u16 (uint32x4_t __a, uint16x4_t __b) { return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u32 (uint64x2_t __a, uint32x2_t __b) { return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s8 (int16x8_t __a, int8x16_t __b) { return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s16 (int32x4_t __a, int16x8_t __b) { return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s32 (int64x2_t __a, int32x4_t __b) { return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b) { return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b) { return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b) { return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a, (int32x4_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s8 (int8x8_t __a, int8x8_t __b) { return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s64 (int64x1_t __a, int64x1_t __b) { return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u8 (uint8x8_t __a, uint8x8_t __b) { return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s8 (int8x8_t __a, int8x8_t __b) { return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s8 (int8x16_t __a, int8x16_t __b) { return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) { return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s16 (int16x8_t __a, int16x8_t __b) { return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s32 (int32x4_t __a, int32x4_t __b) { return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s64 (int64x2_t __a, int64x2_t __b) { return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) { return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a, (int64x2_t) __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s16 (int16x8_t __a, int16x8_t __b) { return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s32 (int32x4_t __a, int32x4_t __b) { return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s64 (int64x2_t __a, int64x2_t __b) { return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) { return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) { return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) { return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a, (int64x2_t) __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u16 (uint16x4_t __a, uint16x4_t __b) { return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u32 (uint32x2_t __a, uint32x2_t __b) { return __builtin_aarch64_uqaddv2si_uuu (__a, __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u64 (uint64x1_t __a, uint64x1_t __b) { return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s8 (int8x16_t __a, int8x16_t __b) { return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s64 (int64x2_t __a, int64x2_t __b) { return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) { return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) { return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) { return __builtin_aarch64_uqaddv4si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) { return __builtin_aarch64_uqaddv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s8 (int8x8_t __a, int8x8_t __b) { return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s64 (int64x1_t __a, int64x1_t __b) { return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u8 (uint8x8_t __a, uint8x8_t __b) { return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u16 (uint16x4_t __a, uint16x4_t __b) { return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u32 (uint32x2_t __a, uint32x2_t __b) { return __builtin_aarch64_uqsubv2si_uuu (__a, __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u64 (uint64x1_t __a, uint64x1_t __b) { return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s8 (int8x16_t __a, int8x16_t __b) { return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s64 (int64x2_t __a, int64x2_t __b) { return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) { return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) { return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) { return __builtin_aarch64_uqsubv4si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) { return __builtin_aarch64_uqsubv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqneg_s8 (int8x8_t __a) { return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqneg_s16 (int16x4_t __a) { return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqneg_s32 (int32x2_t __a) { return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqneg_s64 (int64x1_t __a) { return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqnegq_s8 (int8x16_t __a) { return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqnegq_s16 (int16x8_t __a) { return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqnegq_s32 (int32x4_t __a) { return (int32x4_t) __builtin_aarch64_sqnegv4si (__a); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabs_s8 (int8x8_t __a) { return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabs_s16 (int16x4_t __a) { return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabs_s32 (int32x2_t __a) { return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabs_s64 (int64x1_t __a) { return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabsq_s8 (int8x16_t __a) { return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabsq_s16 (int16x8_t __a) { return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqabsq_s32 (int32x4_t __a) { return (int32x4_t) __builtin_aarch64_sqabsv4si (__a); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) { return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) { return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) { return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) { return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_s8 (uint64_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_s16 (uint64_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_s32 (uint64_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_s64 (uint64_t __a) { return (int64x1_t) {__a}; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_f16 (uint64_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_f32 (uint64_t __a) { return (float32x2_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_u8 (uint64_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_u16 (uint64_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_u32 (uint64_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_u64 (uint64_t __a) { return (uint64x1_t) {__a}; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_f64 (uint64_t __a) { return (float64x1_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_p8 (uint64_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_p16 (uint64_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcreate_p64 (uint64_t __a) { return (poly64x1_t) __a; } /* vget_lane */ __extension__ extern __inline float16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_f16 (float16x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_f32 (float32x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline float64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_f64 (float64x1_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_p8 (poly8x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_p16 (poly16x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_p64 (poly64x1_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_s8 (int8x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_s16 (int16x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_s32 (int32x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_s64 (int64x1_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_u8 (uint8x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_u16 (uint16x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_u32 (uint32x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_lane_u64 (uint64x1_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } /* vgetq_lane */ __extension__ extern __inline float16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_f16 (float16x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_f32 (float32x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline float64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_f64 (float64x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_p8 (poly8x16_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_p16 (poly16x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline poly64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_p64 (poly64x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_s8 (int8x16_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_s16 (int16x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_s32 (int32x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_s64 (int64x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_u8 (uint8x16_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_u16 (uint16x8_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_u32 (uint32x4_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) { return __aarch64_vget_lane_any (__a, __b); } /* vreinterpret */ __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_f16 (float16x4_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_f64 (float64x1_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_s8 (int8x8_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_s16 (int16x4_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_s32 (int32x2_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_s64 (int64x1_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_f32 (float32x2_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_u8 (uint8x8_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_u16 (uint16x4_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_u32 (uint32x2_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_u64 (uint64x1_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_p16 (poly16x4_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p8_p64 (poly64x1_t __a) { return (poly8x8_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_f64 (float64x2_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_s8 (int8x16_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_s16 (int16x8_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_s32 (int32x4_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_s64 (int64x2_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_f16 (float16x8_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_f32 (float32x4_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_u8 (uint8x16_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_u16 (uint16x8_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_u32 (uint32x4_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_u64 (uint64x2_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_p16 (poly16x8_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_p64 (poly64x2_t __a) { return (poly8x16_t) __a; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p8_p128 (poly128_t __a) { return (poly8x16_t)__a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_f16 (float16x4_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_f64 (float64x1_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_s8 (int8x8_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_s16 (int16x4_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_s32 (int32x2_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_s64 (int64x1_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_f32 (float32x2_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_u8 (uint8x8_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_u16 (uint16x4_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_u32 (uint32x2_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_u64 (uint64x1_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_p8 (poly8x8_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p16_p64 (poly64x1_t __a) { return (poly16x4_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_f64 (float64x2_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_s8 (int8x16_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_s16 (int16x8_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_s32 (int32x4_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_s64 (int64x2_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_f16 (float16x8_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_f32 (float32x4_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_u8 (uint8x16_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_u16 (uint16x8_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_u32 (uint32x4_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_u64 (uint64x2_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_p8 (poly8x16_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_p64 (poly64x2_t __a) { return (poly16x8_t) __a; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p16_p128 (poly128_t __a) { return (poly16x8_t)__a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_f16 (float16x4_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_f64 (float64x1_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_s8 (int8x8_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_s16 (int16x4_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_s32 (int32x2_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_s64 (int64x1_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_f32 (float32x2_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_u8 (uint8x8_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_u16 (uint16x4_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_u32 (uint32x2_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_u64 (uint64x1_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_p8 (poly8x8_t __a) { return (poly64x1_t) __a; } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_p64_p16 (poly16x4_t __a) { return (poly64x1_t)__a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_f64 (float64x2_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_s8 (int8x16_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_s16 (int16x8_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_s32 (int32x4_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_s64 (int64x2_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_f16 (float16x8_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_f32 (float32x4_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_p128 (poly128_t __a) { return (poly64x2_t)__a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_u8 (uint8x16_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_u16 (uint16x8_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_p16 (poly16x8_t __a) { return (poly64x2_t)__a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_u32 (uint32x4_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_u64 (uint64x2_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p64_p8 (poly8x16_t __a) { return (poly64x2_t) __a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_p8 (poly8x16_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_p16 (poly16x8_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_f16 (float16x8_t __a) { return (poly128_t) __a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_f32 (float32x4_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_p64 (poly64x2_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_s64 (int64x2_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_u64 (uint64x2_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_s8 (int8x16_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_s16 (int16x8_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_s32 (int32x4_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_u8 (uint8x16_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_u16 (uint16x8_t __a) { return (poly128_t)__a; } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_p128_u32 (uint32x4_t __a) { return (poly128_t)__a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_f64 (float64x1_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_s8 (int8x8_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_s16 (int16x4_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_s32 (int32x2_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_s64 (int64x1_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_f32 (float32x2_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_u8 (uint8x8_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_u16 (uint16x4_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_u32 (uint32x2_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_u64 (uint64x1_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_p8 (poly8x8_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_p16 (poly16x4_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f16_p64 (poly64x1_t __a) { return (float16x4_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_f64 (float64x2_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_s8 (int8x16_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_s16 (int16x8_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_s32 (int32x4_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_s64 (int64x2_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_f32 (float32x4_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_u8 (uint8x16_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_u16 (uint16x8_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_u32 (uint32x4_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_u64 (uint64x2_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_p8 (poly8x16_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_p128 (poly128_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_p16 (poly16x8_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f16_p64 (poly64x2_t __a) { return (float16x8_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_f16 (float16x4_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_f64 (float64x1_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_s8 (int8x8_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_s16 (int16x4_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_s32 (int32x2_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_s64 (int64x1_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_u8 (uint8x8_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_u16 (uint16x4_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_u32 (uint32x2_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_u64 (uint64x1_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_p8 (poly8x8_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_p16 (poly16x4_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f32_p64 (poly64x1_t __a) { return (float32x2_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_f16 (float16x8_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_f64 (float64x2_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_s8 (int8x16_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_s16 (int16x8_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_s32 (int32x4_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_s64 (int64x2_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_u8 (uint8x16_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_u16 (uint16x8_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_u32 (uint32x4_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_u64 (uint64x2_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_p8 (poly8x16_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_p16 (poly16x8_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_p64 (poly64x2_t __a) { return (float32x4_t) __a; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f32_p128 (poly128_t __a) { return (float32x4_t)__a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_f16 (float16x4_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_f32 (float32x2_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_p8 (poly8x8_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_p16 (poly16x4_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_p64 (poly64x1_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_s8 (int8x8_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_s16 (int16x4_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_s32 (int32x2_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_s64 (int64x1_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_u8 (uint8x8_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_u16 (uint16x4_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_u32 (uint32x2_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_f64_u64 (uint64x1_t __a) { return (float64x1_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_f16 (float16x8_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_f32 (float32x4_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_p8 (poly8x16_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_p16 (poly16x8_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_p64 (poly64x2_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_s8 (int8x16_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_s16 (int16x8_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_s32 (int32x4_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_s64 (int64x2_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_u8 (uint8x16_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_u16 (uint16x8_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_u32 (uint32x4_t __a) { return (float64x2_t) __a; } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_f64_u64 (uint64x2_t __a) { return (float64x2_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_f16 (float16x4_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_f64 (float64x1_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_s8 (int8x8_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_s16 (int16x4_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_s32 (int32x2_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_f32 (float32x2_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_u8 (uint8x8_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_u16 (uint16x4_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_u32 (uint32x2_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_u64 (uint64x1_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_p8 (poly8x8_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_p16 (poly16x4_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s64_p64 (poly64x1_t __a) { return (int64x1_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_f64 (float64x2_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_s8 (int8x16_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_s16 (int16x8_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_s32 (int32x4_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_f16 (float16x8_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_f32 (float32x4_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_u8 (uint8x16_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_u16 (uint16x8_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_u32 (uint32x4_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_u64 (uint64x2_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_p8 (poly8x16_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_p16 (poly16x8_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_p64 (poly64x2_t __a) { return (int64x2_t) __a; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s64_p128 (poly128_t __a) { return (int64x2_t)__a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_f16 (float16x4_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_f64 (float64x1_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_s8 (int8x8_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_s16 (int16x4_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_s32 (int32x2_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_s64 (int64x1_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_f32 (float32x2_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_u8 (uint8x8_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_u16 (uint16x4_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_u32 (uint32x2_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_p8 (poly8x8_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_p16 (poly16x4_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u64_p64 (poly64x1_t __a) { return (uint64x1_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_f64 (float64x2_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_s8 (int8x16_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_s16 (int16x8_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_s32 (int32x4_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_s64 (int64x2_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_f16 (float16x8_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_f32 (float32x4_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_u8 (uint8x16_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_u16 (uint16x8_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_u32 (uint32x4_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_p8 (poly8x16_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_p16 (poly16x8_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_p64 (poly64x2_t __a) { return (uint64x2_t) __a; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u64_p128 (poly128_t __a) { return (uint64x2_t)__a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_f16 (float16x4_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_f64 (float64x1_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_s16 (int16x4_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_s32 (int32x2_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_s64 (int64x1_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_f32 (float32x2_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_u8 (uint8x8_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_u16 (uint16x4_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_u32 (uint32x2_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_u64 (uint64x1_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_p8 (poly8x8_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_p16 (poly16x4_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s8_p64 (poly64x1_t __a) { return (int8x8_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_f64 (float64x2_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_s16 (int16x8_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_s32 (int32x4_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_s64 (int64x2_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_f16 (float16x8_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_f32 (float32x4_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_u8 (uint8x16_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_u16 (uint16x8_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_u32 (uint32x4_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_u64 (uint64x2_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_p8 (poly8x16_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_p16 (poly16x8_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_p64 (poly64x2_t __a) { return (int8x16_t) __a; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s8_p128 (poly128_t __a) { return (int8x16_t)__a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_f16 (float16x4_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_f64 (float64x1_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_s8 (int8x8_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_s32 (int32x2_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_s64 (int64x1_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_f32 (float32x2_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_u8 (uint8x8_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_u16 (uint16x4_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_u32 (uint32x2_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_u64 (uint64x1_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_p8 (poly8x8_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_p16 (poly16x4_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s16_p64 (poly64x1_t __a) { return (int16x4_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_f64 (float64x2_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_s8 (int8x16_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_s32 (int32x4_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_s64 (int64x2_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_f16 (float16x8_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_f32 (float32x4_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_u8 (uint8x16_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_u16 (uint16x8_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_u32 (uint32x4_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_u64 (uint64x2_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_p8 (poly8x16_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_p16 (poly16x8_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_p64 (poly64x2_t __a) { return (int16x8_t) __a; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s16_p128 (poly128_t __a) { return (int16x8_t)__a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_f16 (float16x4_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_f64 (float64x1_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_s8 (int8x8_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_s16 (int16x4_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_s64 (int64x1_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_f32 (float32x2_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_u8 (uint8x8_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_u16 (uint16x4_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_u32 (uint32x2_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_u64 (uint64x1_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_p8 (poly8x8_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_p16 (poly16x4_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_s32_p64 (poly64x1_t __a) { return (int32x2_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_f64 (float64x2_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_s8 (int8x16_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_s16 (int16x8_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_s64 (int64x2_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_f16 (float16x8_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_f32 (float32x4_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_u8 (uint8x16_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_u16 (uint16x8_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_u32 (uint32x4_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_u64 (uint64x2_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_p8 (poly8x16_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_p16 (poly16x8_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_p64 (poly64x2_t __a) { return (int32x4_t) __a; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_s32_p128 (poly128_t __a) { return (int32x4_t)__a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_f16 (float16x4_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_f64 (float64x1_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_s8 (int8x8_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_s16 (int16x4_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_s32 (int32x2_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_s64 (int64x1_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_f32 (float32x2_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_u16 (uint16x4_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_u32 (uint32x2_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_u64 (uint64x1_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_p8 (poly8x8_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_p16 (poly16x4_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u8_p64 (poly64x1_t __a) { return (uint8x8_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_f64 (float64x2_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_s8 (int8x16_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_s16 (int16x8_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_s32 (int32x4_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_s64 (int64x2_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_f16 (float16x8_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_f32 (float32x4_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_u16 (uint16x8_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_u32 (uint32x4_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_u64 (uint64x2_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_p8 (poly8x16_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_p16 (poly16x8_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_p64 (poly64x2_t __a) { return (uint8x16_t) __a; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u8_p128 (poly128_t __a) { return (uint8x16_t)__a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_f16 (float16x4_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_f64 (float64x1_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_s8 (int8x8_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_s16 (int16x4_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_s32 (int32x2_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_s64 (int64x1_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_f32 (float32x2_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_u8 (uint8x8_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_u32 (uint32x2_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_u64 (uint64x1_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_p8 (poly8x8_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_p16 (poly16x4_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u16_p64 (poly64x1_t __a) { return (uint16x4_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_f64 (float64x2_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_s8 (int8x16_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_s16 (int16x8_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_s32 (int32x4_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_s64 (int64x2_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_f16 (float16x8_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_f32 (float32x4_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_u8 (uint8x16_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_u32 (uint32x4_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_u64 (uint64x2_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_p8 (poly8x16_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_p16 (poly16x8_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_p64 (poly64x2_t __a) { return (uint16x8_t) __a; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u16_p128 (poly128_t __a) { return (uint16x8_t)__a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_f16 (float16x4_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_f64 (float64x1_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_s8 (int8x8_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_s16 (int16x4_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_s32 (int32x2_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_s64 (int64x1_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_f32 (float32x2_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_u8 (uint8x8_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_u16 (uint16x4_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_u64 (uint64x1_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_p8 (poly8x8_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_p16 (poly16x4_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpret_u32_p64 (poly64x1_t __a) { return (uint32x2_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_f64 (float64x2_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_s8 (int8x16_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_s16 (int16x8_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_s32 (int32x4_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_s64 (int64x2_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_f16 (float16x8_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_f32 (float32x4_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_u8 (uint8x16_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_u16 (uint16x8_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_u64 (uint64x2_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_p8 (poly8x16_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_p16 (poly16x8_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_p64 (poly64x2_t __a) { return (uint32x4_t) __a; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vreinterpretq_u32_p128 (poly128_t __a) { return (uint32x4_t)__a; } /* vset_lane */ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_p64 (poly64_t __elem, poly64x1_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } /* vsetq_lane */ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_p64 (poly64_t __elem, poly64x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) { return __aarch64_vset_lane_any (__elem, __vec, __index); } #define __GET_LOW(__TYPE) \ uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \ return vreinterpret_##__TYPE##_u64 (lo); __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_f16 (float16x8_t __a) { __GET_LOW (f16); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_f32 (float32x4_t __a) { __GET_LOW (f32); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_f64 (float64x2_t __a) { return (float64x1_t) {vgetq_lane_f64 (__a, 0)}; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_p8 (poly8x16_t __a) { __GET_LOW (p8); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_p16 (poly16x8_t __a) { __GET_LOW (p16); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_p64 (poly64x2_t __a) { __GET_LOW (p64); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_s8 (int8x16_t __a) { __GET_LOW (s8); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_s16 (int16x8_t __a) { __GET_LOW (s16); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_s32 (int32x4_t __a) { __GET_LOW (s32); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_s64 (int64x2_t __a) { __GET_LOW (s64); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_u8 (uint8x16_t __a) { __GET_LOW (u8); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_u16 (uint16x8_t __a) { __GET_LOW (u16); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_u32 (uint32x4_t __a) { __GET_LOW (u32); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_low_u64 (uint64x2_t __a) { return vcreate_u64 (vgetq_lane_u64 (__a, 0)); } #undef __GET_LOW #define __GET_HIGH(__TYPE) \ uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ return vreinterpret_##__TYPE##_u64 (hi); __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f16 (float16x8_t __a) { __GET_HIGH (f16); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f32 (float32x4_t __a) { __GET_HIGH (f32); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f64 (float64x2_t __a) { __GET_HIGH (f64); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p8 (poly8x16_t __a) { __GET_HIGH (p8); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p16 (poly16x8_t __a) { __GET_HIGH (p16); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p64 (poly64x2_t __a) { __GET_HIGH (p64); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s8 (int8x16_t __a) { __GET_HIGH (s8); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s16 (int16x8_t __a) { __GET_HIGH (s16); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s32 (int32x4_t __a) { __GET_HIGH (s32); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s64 (int64x2_t __a) { __GET_HIGH (s64); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u8 (uint8x16_t __a) { __GET_HIGH (u8); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u16 (uint16x8_t __a) { __GET_HIGH (u16); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u32 (uint32x4_t __a) { __GET_HIGH (u32); } #undef __GET_HIGH __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u64 (uint64x2_t __a) { return vcreate_u64 (vgetq_lane_u64 (__a, 1)); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s16 (int16x4_t __a, int16x4_t __b) { return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s32 (int32x2_t __a, int32x2_t __b) { return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s64 (int64x1_t __a, int64x1_t __b) { return __builtin_aarch64_combinedi (__a[0], __b[0]); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_f16 (float16x4_t __a, float16x4_t __b) { return __builtin_aarch64_combinev4hf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_f32 (float32x2_t __a, float32x2_t __b) { return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u8 (uint8x8_t __a, uint8x8_t __b) { return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u16 (uint16x4_t __a, uint16x4_t __b) { return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u32 (uint32x2_t __a, uint32x2_t __b) { return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u64 (uint64x1_t __a, uint64x1_t __b) { return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_f64 (float64x1_t __a, float64x1_t __b) { return __builtin_aarch64_combinedf (__a[0], __b[0]); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p8 (poly8x8_t __a, poly8x8_t __b) { return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p16 (poly16x4_t __a, poly16x4_t __b) { return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p64 (poly64x1_t __a, poly64x1_t __b) { return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); } /* Start of temporary inline asm implementations. */ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) { int8x8_t result; __asm__ ("saba %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) { int16x4_t result; __asm__ ("saba %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) { int32x2_t result; __asm__ ("saba %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t result; __asm__ ("uaba %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) { uint16x4_t result; __asm__ ("uaba %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) { uint32x2_t result; __asm__ ("uaba %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) { int16x8_t result; __asm__ ("sabal2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) { int32x4_t result; __asm__ ("sabal2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) { int64x2_t result; __asm__ ("sabal2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) { uint16x8_t result; __asm__ ("uabal2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) { uint32x4_t result; __asm__ ("uabal2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) { uint64x2_t result; __asm__ ("uabal2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) { int16x8_t result; __asm__ ("sabal %0.8h,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) { int32x4_t result; __asm__ ("sabal %0.4s,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) { int64x2_t result; __asm__ ("sabal %0.2d,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) { uint16x8_t result; __asm__ ("uabal %0.8h,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) { uint32x4_t result; __asm__ ("uabal %0.4s,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) { uint64x2_t result; __asm__ ("uabal %0.2d,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) { int8x16_t result; __asm__ ("saba %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) { int16x8_t result; __asm__ ("saba %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) { int32x4_t result; __asm__ ("saba %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) { uint8x16_t result; __asm__ ("uaba %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) { uint16x8_t result; __asm__ ("uaba %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) { uint32x4_t result; __asm__ ("uaba %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_s8 (int8x8_t a, int8x8_t b) { int8x8_t result; __asm__ ("sabd %0.8b, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_s16 (int16x4_t a, int16x4_t b) { int16x4_t result; __asm__ ("sabd %0.4h, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_s32 (int32x2_t a, int32x2_t b) { int32x2_t result; __asm__ ("sabd %0.2s, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_u8 (uint8x8_t a, uint8x8_t b) { uint8x8_t result; __asm__ ("uabd %0.8b, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_u16 (uint16x4_t a, uint16x4_t b) { uint16x4_t result; __asm__ ("uabd %0.4h, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_u32 (uint32x2_t a, uint32x2_t b) { uint32x2_t result; __asm__ ("uabd %0.2s, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_s8 (int8x16_t a, int8x16_t b) { int16x8_t result; __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_s16 (int16x8_t a, int16x8_t b) { int32x4_t result; __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_s32 (int32x4_t a, int32x4_t b) { int64x2_t result; __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_u8 (uint8x16_t a, uint8x16_t b) { uint16x8_t result; __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_u16 (uint16x8_t a, uint16x8_t b) { uint32x4_t result; __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_high_u32 (uint32x4_t a, uint32x4_t b) { uint64x2_t result; __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_s8 (int8x8_t a, int8x8_t b) { int16x8_t result; __asm__ ("sabdl %0.8h, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_s16 (int16x4_t a, int16x4_t b) { int32x4_t result; __asm__ ("sabdl %0.4s, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_s32 (int32x2_t a, int32x2_t b) { int64x2_t result; __asm__ ("sabdl %0.2d, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_u8 (uint8x8_t a, uint8x8_t b) { uint16x8_t result; __asm__ ("uabdl %0.8h, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_u16 (uint16x4_t a, uint16x4_t b) { uint32x4_t result; __asm__ ("uabdl %0.4s, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdl_u32 (uint32x2_t a, uint32x2_t b) { uint64x2_t result; __asm__ ("uabdl %0.2d, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_s8 (int8x16_t a, int8x16_t b) { int8x16_t result; __asm__ ("sabd %0.16b, %1.16b, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_s16 (int16x8_t a, int16x8_t b) { int16x8_t result; __asm__ ("sabd %0.8h, %1.8h, %2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_s32 (int32x4_t a, int32x4_t b) { int32x4_t result; __asm__ ("sabd %0.4s, %1.4s, %2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_u8 (uint8x16_t a, uint8x16_t b) { uint8x16_t result; __asm__ ("uabd %0.16b, %1.16b, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_u16 (uint16x8_t a, uint16x8_t b) { uint16x8_t result; __asm__ ("uabd %0.8h, %1.8h, %2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_u32 (uint32x4_t a, uint32x4_t b) { uint32x4_t result; __asm__ ("uabd %0.4s, %1.4s, %2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s8 (int8x8_t a) { int16_t result; __asm__ ("saddlv %h0,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s16 (int16x4_t a) { int32_t result; __asm__ ("saddlv %s0,%1.4h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u8 (uint8x8_t a) { uint16_t result; __asm__ ("uaddlv %h0,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u16 (uint16x4_t a) { uint32_t result; __asm__ ("uaddlv %s0,%1.4h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s8 (int8x16_t a) { int16_t result; __asm__ ("saddlv %h0,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s16 (int16x8_t a) { int32_t result; __asm__ ("saddlv %s0,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s32 (int32x4_t a) { int64_t result; __asm__ ("saddlv %d0,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u8 (uint8x16_t a) { uint16_t result; __asm__ ("uaddlv %h0,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u16 (uint16x8_t a) { uint32_t result; __asm__ ("uaddlv %s0,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u32 (uint32x4_t a) { uint64_t result; __asm__ ("uaddlv %d0,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtx_f32_f64 (float64x2_t a) { float32x2_t result; __asm__ ("fcvtxn %0.2s,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) { float32x4_t result; __asm__ ("fcvtxn2 %0.4s,%1.2d" : "=w"(result) : "w" (b), "0"(a) : /* No clobbers */); return result; } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtxd_f32_f64 (float64_t a) { float32_t result; __asm__ ("fcvtxn %s0,%d1" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { float32x2_t result; float32x2_t t1; __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) { int16x4_t result; __asm__ ("mla %0.4h,%2.4h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) { int32x2_t result; __asm__ ("mla %0.2s,%2.2s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) { uint16x4_t result; __asm__ ("mla %0.4h,%2.4h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) { uint32x2_t result; __asm__ ("mla %0.2s,%2.2s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) { int8x8_t result; __asm__ ("mla %0.8b, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) { int16x4_t result; __asm__ ("mla %0.4h, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) { int32x2_t result; __asm__ ("mla %0.2s, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t result; __asm__ ("mla %0.8b, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) { uint16x4_t result; __asm__ ("mla %0.4h, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) { uint32x2_t result; __asm__ ("mla %0.2s, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } #define vmlal_high_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x4_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x2_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x4_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x2_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_high_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) { int32x4_t result; __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) { int64x2_t result; __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) { uint32x4_t result; __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) { uint64x2_t result; __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) { int16x8_t result; __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) { int32x4_t result; __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) { int64x2_t result; __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) { uint16x8_t result; __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) { uint32x4_t result; __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) { uint64x2_t result; __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } #define vmlal_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x4_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x2_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x4_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x2_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlal_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) { int32x4_t result; __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) { int64x2_t result; __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) { uint32x4_t result; __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) { uint64x2_t result; __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) { int16x8_t result; __asm__ ("smlal %0.8h,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) { int32x4_t result; __asm__ ("smlal %0.4s,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) { int64x2_t result; __asm__ ("smlal %0.2d,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) { uint16x8_t result; __asm__ ("umlal %0.8h,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) { uint32x4_t result; __asm__ ("umlal %0.4s,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) { uint64x2_t result; __asm__ ("umlal %0.2d,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) { float32x4_t result; float32x4_t t1; __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) { int16x8_t result; __asm__ ("mla %0.8h,%2.8h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) { int32x4_t result; __asm__ ("mla %0.4s,%2.4s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) { uint16x8_t result; __asm__ ("mla %0.8h,%2.8h,%3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) { uint32x4_t result; __asm__ ("mla %0.4s,%2.4s,%3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) { int8x16_t result; __asm__ ("mla %0.16b, %2.16b, %3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) { int16x8_t result; __asm__ ("mla %0.8h, %2.8h, %3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) { int32x4_t result; __asm__ ("mla %0.4s, %2.4s, %3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) { uint8x16_t result; __asm__ ("mla %0.16b, %2.16b, %3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) { uint16x8_t result; __asm__ ("mla %0.8h, %2.8h, %3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) { uint32x4_t result; __asm__ ("mla %0.4s, %2.4s, %3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { float32x2_t result; float32x2_t t1; __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) { int16x4_t result; __asm__ ("mls %0.4h, %2.4h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) { int32x2_t result; __asm__ ("mls %0.2s, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) { uint16x4_t result; __asm__ ("mls %0.4h, %2.4h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) { uint32x2_t result; __asm__ ("mls %0.2s, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) { int8x8_t result; __asm__ ("mls %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) { int16x4_t result; __asm__ ("mls %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) { int32x2_t result; __asm__ ("mls %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t result; __asm__ ("mls %0.8b,%2.8b,%3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) { uint16x4_t result; __asm__ ("mls %0.4h,%2.4h,%3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) { uint32x2_t result; __asm__ ("mls %0.2s,%2.2s,%3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } #define vmlsl_high_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x4_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x2_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x4_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x2_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x8_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x4_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x8_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_high_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x4_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) { int32x4_t result; __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) { int64x2_t result; __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) { uint32x4_t result; __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) { uint64x2_t result; __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) { int16x8_t result; __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) { int32x4_t result; __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) { int64x2_t result; __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) { uint16x8_t result; __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) { uint32x4_t result; __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) { uint64x2_t result; __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } #define vmlsl_lane_s16(a, b, c, d) \ __extension__ \ ({ \ int16x4_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_lane_s32(a, b, c, d) \ __extension__ \ ({ \ int32x2_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_lane_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x4_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_lane_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x2_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_laneq_s16(a, b, c, d) \ __extension__ \ ({ \ int16x8_t c_ = (c); \ int16x4_t b_ = (b); \ int32x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_laneq_s32(a, b, c, d) \ __extension__ \ ({ \ int32x4_t c_ = (c); \ int32x2_t b_ = (b); \ int64x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_laneq_u16(a, b, c, d) \ __extension__ \ ({ \ uint16x8_t c_ = (c); \ uint16x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) #define vmlsl_laneq_u32(a, b, c, d) \ __extension__ \ ({ \ uint32x4_t c_ = (c); \ uint32x2_t b_ = (b); \ uint64x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ : "=w"(result) \ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) { int32x4_t result; __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) { int64x2_t result; __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) { uint32x4_t result; __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) { uint64x2_t result; __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) { int16x8_t result; __asm__ ("smlsl %0.8h, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) { int32x4_t result; __asm__ ("smlsl %0.4s, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) { int64x2_t result; __asm__ ("smlsl %0.2d, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) { uint16x8_t result; __asm__ ("umlsl %0.8h, %2.8b, %3.8b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) { uint32x4_t result; __asm__ ("umlsl %0.4s, %2.4h, %3.4h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) { uint64x2_t result; __asm__ ("umlsl %0.2d, %2.2s, %3.2s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) { float32x4_t result; float32x4_t t1; __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" : "=w"(result), "=w"(t1) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) { int16x8_t result; __asm__ ("mls %0.8h, %2.8h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) { int32x4_t result; __asm__ ("mls %0.4s, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) { uint16x8_t result; __asm__ ("mls %0.8h, %2.8h, %3.h[0]" : "=w"(result) : "0"(a), "w"(b), "x"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) { uint32x4_t result; __asm__ ("mls %0.4s, %2.4s, %3.s[0]" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) { int8x16_t result; __asm__ ("mls %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) { int16x8_t result; __asm__ ("mls %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) { int32x4_t result; __asm__ ("mls %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) { uint8x16_t result; __asm__ ("mls %0.16b,%2.16b,%3.16b" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) { uint16x8_t result; __asm__ ("mls %0.8h,%2.8h,%3.8h" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) { uint32x4_t result; __asm__ ("mls %0.4s,%2.4s,%3.4s" : "=w"(result) : "0"(a), "w"(b), "w"(c) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s8 (int8x16_t a) { int16x8_t result; __asm__ ("sshll2 %0.8h,%1.16b,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s16 (int16x8_t a) { int32x4_t result; __asm__ ("sshll2 %0.4s,%1.8h,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s32 (int32x4_t a) { int64x2_t result; __asm__ ("sshll2 %0.2d,%1.4s,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u8 (uint8x16_t a) { uint16x8_t result; __asm__ ("ushll2 %0.8h,%1.16b,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u16 (uint16x8_t a) { uint32x4_t result; __asm__ ("ushll2 %0.4s,%1.8h,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u32 (uint32x4_t a) { uint64x2_t result; __asm__ ("ushll2 %0.2d,%1.4s,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_s8 (int8x8_t a) { int16x8_t result; __asm__ ("sshll %0.8h,%1.8b,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_s16 (int16x4_t a) { int32x4_t result; __asm__ ("sshll %0.4s,%1.4h,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_s32 (int32x2_t a) { int64x2_t result; __asm__ ("sshll %0.2d,%1.2s,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_u8 (uint8x8_t a) { uint16x8_t result; __asm__ ("ushll %0.8h,%1.8b,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_u16 (uint16x4_t a) { uint32x4_t result; __asm__ ("ushll %0.4s,%1.4h,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_u32 (uint32x2_t a) { uint64x2_t result; __asm__ ("ushll %0.2d,%1.2s,#0" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_s16 (int8x8_t a, int16x8_t b) { int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.16b,%1.8h" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_s32 (int16x4_t a, int32x4_t b) { int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.8h,%1.4s" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_s64 (int32x2_t a, int64x2_t b) { int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.4s,%1.2d" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u16 (uint8x8_t a, uint16x8_t b) { uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.16b,%1.8h" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u32 (uint16x4_t a, uint32x4_t b) { uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.8h,%1.4s" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u64 (uint32x2_t a, uint64x2_t b) { uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.4s,%1.2d" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_s16 (int16x8_t a) { int8x8_t result; __asm__ ("xtn %0.8b,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_s32 (int32x4_t a) { int16x4_t result; __asm__ ("xtn %0.4h,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_s64 (int64x2_t a) { int32x2_t result; __asm__ ("xtn %0.2s,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u16 (uint16x8_t a) { uint8x8_t result; __asm__ ("xtn %0.8b,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u32 (uint32x4_t a) { uint16x4_t result; __asm__ ("xtn %0.4h,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u64 (uint64x2_t a) { uint32x2_t result; __asm__ ("xtn %0.2s,%1.2d" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } #define vmull_high_lane_s16(a, b, c) \ __extension__ \ ({ \ int16x4_t b_ = (b); \ int16x8_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_lane_s32(a, b, c) \ __extension__ \ ({ \ int32x2_t b_ = (b); \ int32x4_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_lane_u16(a, b, c) \ __extension__ \ ({ \ uint16x4_t b_ = (b); \ uint16x8_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_lane_u32(a, b, c) \ __extension__ \ ({ \ uint32x2_t b_ = (b); \ uint32x4_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_laneq_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int16x8_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_laneq_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int32x4_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_laneq_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint16x8_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_high_laneq_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint32x4_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_n_s16 (int16x8_t a, int16_t b) { int32x4_t result; __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_n_s32 (int32x4_t a, int32_t b) { int64x2_t result; __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_n_u16 (uint16x8_t a, uint16_t b) { uint32x4_t result; __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_n_u32 (uint32x4_t a, uint32_t b) { uint64x2_t result; __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_p8 (poly8x16_t a, poly8x16_t b) { poly16x8_t result; __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_s8 (int8x16_t a, int8x16_t b) { int16x8_t result; __asm__ ("smull2 %0.8h,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_s16 (int16x8_t a, int16x8_t b) { int32x4_t result; __asm__ ("smull2 %0.4s,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_s32 (int32x4_t a, int32x4_t b) { int64x2_t result; __asm__ ("smull2 %0.2d,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_u8 (uint8x16_t a, uint8x16_t b) { uint16x8_t result; __asm__ ("umull2 %0.8h,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_u16 (uint16x8_t a, uint16x8_t b) { uint32x4_t result; __asm__ ("umull2 %0.4s,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_u32 (uint32x4_t a, uint32x4_t b) { uint64x2_t result; __asm__ ("umull2 %0.2d,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } #define vmull_lane_s16(a, b, c) \ __extension__ \ ({ \ int16x4_t b_ = (b); \ int16x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_lane_s32(a, b, c) \ __extension__ \ ({ \ int32x2_t b_ = (b); \ int32x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_lane_u16(a, b, c) \ __extension__ \ ({ \ uint16x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_lane_u32(a, b, c) \ __extension__ \ ({ \ uint32x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_laneq_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int16x4_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_laneq_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int32x2_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_laneq_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint16x4_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ : "=w"(result) \ : "w"(a_), "x"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vmull_laneq_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint32x2_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ : "=w"(result) \ : "w"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_n_s16 (int16x4_t a, int16_t b) { int32x4_t result; __asm__ ("smull %0.4s,%1.4h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_n_s32 (int32x2_t a, int32_t b) { int64x2_t result; __asm__ ("smull %0.2d,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_n_u16 (uint16x4_t a, uint16_t b) { uint32x4_t result; __asm__ ("umull %0.4s,%1.4h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_n_u32 (uint32x2_t a, uint32_t b) { uint64x2_t result; __asm__ ("umull %0.2d,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_p8 (poly8x8_t a, poly8x8_t b) { poly16x8_t result; __asm__ ("pmull %0.8h, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_s8 (int8x8_t a, int8x8_t b) { int16x8_t result; __asm__ ("smull %0.8h, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_s16 (int16x4_t a, int16x4_t b) { int32x4_t result; __asm__ ("smull %0.4s, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_s32 (int32x2_t a, int32x2_t b) { int64x2_t result; __asm__ ("smull %0.2d, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_u8 (uint8x8_t a, uint8x8_t b) { uint16x8_t result; __asm__ ("umull %0.8h, %1.8b, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_u16 (uint16x4_t a, uint16x4_t b) { uint32x4_t result; __asm__ ("umull %0.4s, %1.4h, %2.4h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_u32 (uint32x2_t a, uint32x2_t b) { uint64x2_t result; __asm__ ("umull %0.2d, %1.2s, %2.2s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_s8 (int16x4_t a, int8x8_t b) { int16x4_t result; __asm__ ("sadalp %0.4h,%2.8b" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_s16 (int32x2_t a, int16x4_t b) { int32x2_t result; __asm__ ("sadalp %0.2s,%2.4h" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_s32 (int64x1_t a, int32x2_t b) { int64x1_t result; __asm__ ("sadalp %0.1d,%2.2s" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_u8 (uint16x4_t a, uint8x8_t b) { uint16x4_t result; __asm__ ("uadalp %0.4h,%2.8b" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_u16 (uint32x2_t a, uint16x4_t b) { uint32x2_t result; __asm__ ("uadalp %0.2s,%2.4h" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_u32 (uint64x1_t a, uint32x2_t b) { uint64x1_t result; __asm__ ("uadalp %0.1d,%2.2s" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_s8 (int16x8_t a, int8x16_t b) { int16x8_t result; __asm__ ("sadalp %0.8h,%2.16b" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_s16 (int32x4_t a, int16x8_t b) { int32x4_t result; __asm__ ("sadalp %0.4s,%2.8h" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_s32 (int64x2_t a, int32x4_t b) { int64x2_t result; __asm__ ("sadalp %0.2d,%2.4s" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_u8 (uint16x8_t a, uint8x16_t b) { uint16x8_t result; __asm__ ("uadalp %0.8h,%2.16b" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_u16 (uint32x4_t a, uint16x8_t b) { uint32x4_t result; __asm__ ("uadalp %0.4s,%2.8h" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadalq_u32 (uint64x2_t a, uint32x4_t b) { uint64x2_t result; __asm__ ("uadalp %0.2d,%2.4s" : "=w"(result) : "0"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s8 (int8x8_t a) { int16x4_t result; __asm__ ("saddlp %0.4h,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s16 (int16x4_t a) { int32x2_t result; __asm__ ("saddlp %0.2s,%1.4h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s32 (int32x2_t a) { int64x1_t result; __asm__ ("saddlp %0.1d,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u8 (uint8x8_t a) { uint16x4_t result; __asm__ ("uaddlp %0.4h,%1.8b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u16 (uint16x4_t a) { uint32x2_t result; __asm__ ("uaddlp %0.2s,%1.4h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u32 (uint32x2_t a) { uint64x1_t result; __asm__ ("uaddlp %0.1d,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s8 (int8x16_t a) { int16x8_t result; __asm__ ("saddlp %0.8h,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s16 (int16x8_t a) { int32x4_t result; __asm__ ("saddlp %0.4s,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s32 (int32x4_t a) { int64x2_t result; __asm__ ("saddlp %0.2d,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u8 (uint8x16_t a) { uint16x8_t result; __asm__ ("uaddlp %0.8h,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u16 (uint16x8_t a) { uint32x4_t result; __asm__ ("uaddlp %0.4s,%1.8h" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u32 (uint32x4_t a) { uint64x2_t result; __asm__ ("uaddlp %0.2d,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s8 (int8x16_t a, int8x16_t b) { int8x16_t result; __asm__ ("addp %0.16b,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s16 (int16x8_t a, int16x8_t b) { int16x8_t result; __asm__ ("addp %0.8h,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s32 (int32x4_t a, int32x4_t b) { int32x4_t result; __asm__ ("addp %0.4s,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s64 (int64x2_t a, int64x2_t b) { int64x2_t result; __asm__ ("addp %0.2d,%1.2d,%2.2d" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u8 (uint8x16_t a, uint8x16_t b) { uint8x16_t result; __asm__ ("addp %0.16b,%1.16b,%2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u16 (uint16x8_t a, uint16x8_t b) { uint16x8_t result; __asm__ ("addp %0.8h,%1.8h,%2.8h" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u32 (uint32x4_t a, uint32x4_t b) { uint32x4_t result; __asm__ ("addp %0.4s,%1.4s,%2.4s" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u64 (uint64x2_t a, uint64x2_t b) { uint64x2_t result; __asm__ ("addp %0.2d,%1.2d,%2.2d" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s16 (int16x4_t a, int16_t b) { int16x4_t result; __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s32 (int32x2_t a, int32_t b) { int32x2_t result; __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s16 (int16x8_t a, int16_t b) { int16x8_t result; __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s32 (int32x4_t a, int32_t b) { int32x4_t result; __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_s16 (int8x8_t a, int16x8_t b) { int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.16b, %1.8h" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_s32 (int16x4_t a, int32x4_t b) { int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.8h, %1.4s" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_s64 (int32x2_t a, int64x2_t b) { int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.4s, %1.2d" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) { uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.16b, %1.8h" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) { uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.8h, %1.4s" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) { uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.4s, %1.2d" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_high_s16 (uint8x8_t a, int16x8_t b) { uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.16b, %1.8h" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_high_s32 (uint16x4_t a, int32x4_t b) { uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.8h, %1.4s" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_high_s64 (uint32x2_t a, int64x2_t b) { uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.4s, %1.2d" : "+w"(result) : "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s16 (int16x4_t a, int16_t b) { int16x4_t result; __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s32 (int32x2_t a, int32_t b) { int32x2_t result; __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s16 (int16x8_t a, int16_t b) { int16x8_t result; __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" : "=w"(result) : "w"(a), "x"(b) : /* No clobbers */); return result; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s32 (int32x4_t a, int32_t b) { int32x4_t result; __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } #define vqrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ (a_, vcreate_s8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrn_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ (a_, vcreate_s16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrn_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ (a_, vcreate_s32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrn_high_n_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrn_high_n_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrn_high_n_u64(a, b, c) \ __extension__ \ ({ \ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrun_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrun_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqrshrun_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ (a_, vcreate_s8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ (a_, vcreate_s16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ (a_, vcreate_s32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrn_high_n_u64(a, b, c) \ __extension__ \ ({ \ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrun_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrun_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vqshrun_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ (a_, vcreate_s8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ (a_, vcreate_s16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ (a_, vcreate_s32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_high_n_u64(a, b, c) \ __extension__ \ ({ \ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_s16(a, b) \ __extension__ \ ({ \ int16x8_t a_ = (a); \ int8x8_t result; \ __asm__ ("rshrn %0.8b,%1.8h,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_s32(a, b) \ __extension__ \ ({ \ int32x4_t a_ = (a); \ int16x4_t result; \ __asm__ ("rshrn %0.4h,%1.4s,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_s64(a, b) \ __extension__ \ ({ \ int64x2_t a_ = (a); \ int32x2_t result; \ __asm__ ("rshrn %0.2s,%1.2d,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_u16(a, b) \ __extension__ \ ({ \ uint16x8_t a_ = (a); \ uint8x8_t result; \ __asm__ ("rshrn %0.8b,%1.8h,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_u32(a, b) \ __extension__ \ ({ \ uint32x4_t a_ = (a); \ uint16x4_t result; \ __asm__ ("rshrn %0.4h,%1.4s,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vrshrn_n_u64(a, b) \ __extension__ \ ({ \ uint64x2_t a_ = (a); \ uint32x2_t result; \ __asm__ ("rshrn %0.2s,%1.2d,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrte_u32 (uint32x2_t a) { uint32x2_t result; __asm__ ("ursqrte %0.2s,%1.2s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrteq_u32 (uint32x4_t a) { uint32x4_t result; __asm__ ("ursqrte %0.4s,%1.4s" : "=w"(result) : "w"(a) : /* No clobbers */); return result; } #define vshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ (a_, vcreate_s8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_high_n_s32(a, b, c) \ __extension__ \ ({ \ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ (a_, vcreate_s16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_high_n_s64(a, b, c) \ __extension__ \ ({ \ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ (a_, vcreate_s32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_high_n_u16(a, b, c) \ __extension__ \ ({ \ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ (a_, vcreate_u8 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_high_n_u32(a, b, c) \ __extension__ \ ({ \ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ (a_, vcreate_u16 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_high_n_u64(a, b, c) \ __extension__ \ ({ \ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ (a_, vcreate_u32 \ (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_s16(a, b) \ __extension__ \ ({ \ int16x8_t a_ = (a); \ int8x8_t result; \ __asm__ ("shrn %0.8b,%1.8h,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_s32(a, b) \ __extension__ \ ({ \ int32x4_t a_ = (a); \ int16x4_t result; \ __asm__ ("shrn %0.4h,%1.4s,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_s64(a, b) \ __extension__ \ ({ \ int64x2_t a_ = (a); \ int32x2_t result; \ __asm__ ("shrn %0.2s,%1.2d,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_u16(a, b) \ __extension__ \ ({ \ uint16x8_t a_ = (a); \ uint8x8_t result; \ __asm__ ("shrn %0.8b,%1.8h,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_u32(a, b) \ __extension__ \ ({ \ uint32x4_t a_ = (a); \ uint16x4_t result; \ __asm__ ("shrn %0.4h,%1.4s,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vshrn_n_u64(a, b) \ __extension__ \ ({ \ uint64x2_t a_ = (a); \ uint32x2_t result; \ __asm__ ("shrn %0.2s,%1.2d,%2" \ : "=w"(result) \ : "w"(a_), "i"(b) \ : /* No clobbers */); \ result; \ }) #define vsli_n_p8(a, b, c) \ __extension__ \ ({ \ poly8x8_t b_ = (b); \ poly8x8_t a_ = (a); \ poly8x8_t result; \ __asm__ ("sli %0.8b,%2.8b,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsli_n_p16(a, b, c) \ __extension__ \ ({ \ poly16x4_t b_ = (b); \ poly16x4_t a_ = (a); \ poly16x4_t result; \ __asm__ ("sli %0.4h,%2.4h,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsliq_n_p8(a, b, c) \ __extension__ \ ({ \ poly8x16_t b_ = (b); \ poly8x16_t a_ = (a); \ poly8x16_t result; \ __asm__ ("sli %0.16b,%2.16b,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsliq_n_p16(a, b, c) \ __extension__ \ ({ \ poly16x8_t b_ = (b); \ poly16x8_t a_ = (a); \ poly16x8_t result; \ __asm__ ("sli %0.8h,%2.8h,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsri_n_p8(a, b, c) \ __extension__ \ ({ \ poly8x8_t b_ = (b); \ poly8x8_t a_ = (a); \ poly8x8_t result; \ __asm__ ("sri %0.8b,%2.8b,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsri_n_p16(a, b, c) \ __extension__ \ ({ \ poly16x4_t b_ = (b); \ poly16x4_t a_ = (a); \ poly16x4_t result; \ __asm__ ("sri %0.4h,%2.4h,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsri_n_p64(a, b, c) \ __extension__ \ ({ \ poly64x1_t b_ = (b); \ poly64x1_t a_ = (a); \ poly64x1_t result; \ __asm__ ("sri %d0,%d2,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers. */); \ result; \ }) #define vsriq_n_p8(a, b, c) \ __extension__ \ ({ \ poly8x16_t b_ = (b); \ poly8x16_t a_ = (a); \ poly8x16_t result; \ __asm__ ("sri %0.16b,%2.16b,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsriq_n_p16(a, b, c) \ __extension__ \ ({ \ poly16x8_t b_ = (b); \ poly16x8_t a_ = (a); \ poly16x8_t result; \ __asm__ ("sri %0.8h,%2.8h,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers */); \ result; \ }) #define vsriq_n_p64(a, b, c) \ __extension__ \ ({ \ poly64x2_t b_ = (b); \ poly64x2_t a_ = (a); \ poly64x2_t result; \ __asm__ ("sri %0.2d,%2.2d,%3" \ : "=w"(result) \ : "0"(a_), "w"(b_), "i"(c) \ : /* No clobbers. */); \ result; \ }) __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p8 (poly8x8_t a, poly8x8_t b) { return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b)) != 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p16 (poly16x4_t a, poly16x4_t b) { return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b)) != 0); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p64 (poly64x1_t a, poly64x1_t b) { return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0)); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p8 (poly8x16_t a, poly8x16_t b) { return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b)) != 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p16 (poly16x8_t a, poly16x8_t b) { return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b)) != 0); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtstq_p64 (poly64x2_t a, poly64x2_t b) { return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b)) != __AARCH64_INT64_C (0)); } /* End of temporary inline asm implementations. */ /* Start of temporary inline asm for vldn, vstn and friends. */ /* Create struct element types for duplicating loads. Create 2 element structures of: +------+----+----+----+----+ | | 8 | 16 | 32 | 64 | +------+----+----+----+----+ |int | Y | Y | N | N | +------+----+----+----+----+ |uint | Y | Y | N | N | +------+----+----+----+----+ |float | - | Y | N | N | +------+----+----+----+----+ |poly | Y | Y | - | - | +------+----+----+----+----+ Create 3 element structures of: +------+----+----+----+----+ | | 8 | 16 | 32 | 64 | +------+----+----+----+----+ |int | Y | Y | Y | Y | +------+----+----+----+----+ |uint | Y | Y | Y | Y | +------+----+----+----+----+ |float | - | Y | Y | Y | +------+----+----+----+----+ |poly | Y | Y | - | - | +------+----+----+----+----+ Create 4 element structures of: +------+----+----+----+----+ | | 8 | 16 | 32 | 64 | +------+----+----+----+----+ |int | Y | N | N | Y | +------+----+----+----+----+ |uint | Y | N | N | Y | +------+----+----+----+----+ |float | - | N | N | Y | +------+----+----+----+----+ |poly | Y | N | - | - | +------+----+----+----+----+ This is required for casting memory reference. */ #define __STRUCTN(t, sz, nelem) \ typedef struct t ## sz ## x ## nelem ## _t { \ t ## sz ## _t val[nelem]; \ } t ## sz ## x ## nelem ## _t; /* 2-element structs. */ __STRUCTN (int, 8, 2) __STRUCTN (int, 16, 2) __STRUCTN (uint, 8, 2) __STRUCTN (uint, 16, 2) __STRUCTN (float, 16, 2) __STRUCTN (poly, 8, 2) __STRUCTN (poly, 16, 2) /* 3-element structs. */ __STRUCTN (int, 8, 3) __STRUCTN (int, 16, 3) __STRUCTN (int, 32, 3) __STRUCTN (int, 64, 3) __STRUCTN (uint, 8, 3) __STRUCTN (uint, 16, 3) __STRUCTN (uint, 32, 3) __STRUCTN (uint, 64, 3) __STRUCTN (float, 16, 3) __STRUCTN (float, 32, 3) __STRUCTN (float, 64, 3) __STRUCTN (poly, 8, 3) __STRUCTN (poly, 16, 3) /* 4-element structs. */ __STRUCTN (int, 8, 4) __STRUCTN (int, 64, 4) __STRUCTN (uint, 8, 4) __STRUCTN (uint, 64, 4) __STRUCTN (poly, 8, 4) __STRUCTN (float, 64, 4) #undef __STRUCTN #define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \ qmode, ptr_mode, funcsuffix, signedtype) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ __builtin_aarch64_simd_oi __o; \ largetype __temp; \ __temp.val[0] \ = vcombine_##funcsuffix (__b.val[0], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[1] \ = vcombine_##funcsuffix (__b.val[1], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ (signedtype) __temp.val[0], 0); \ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ (signedtype) __temp.val[1], 1); \ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __o, __c); \ } __ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16, float16x8_t) __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, float32x4_t) __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, float64x2_t) __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, int8x16_t) __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, int16x8_t) __ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64, poly64x2_t) __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, int8x16_t) __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, int16x8_t) __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, int32x4_t) __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, int64x2_t) __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, int8x16_t) __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, int16x8_t) __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, int32x4_t) __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, int64x2_t) #undef __ST2_LANE_FUNC #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ union { intype __i; \ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __temp.__o, __c); \ } __ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16) __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) __ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64) __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) #define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \ qmode, ptr_mode, funcsuffix, signedtype) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ __builtin_aarch64_simd_ci __o; \ largetype __temp; \ __temp.val[0] \ = vcombine_##funcsuffix (__b.val[0], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[1] \ = vcombine_##funcsuffix (__b.val[1], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[2] \ = vcombine_##funcsuffix (__b.val[2], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __o = __builtin_aarch64_set_qregci##qmode (__o, \ (signedtype) __temp.val[0], 0); \ __o = __builtin_aarch64_set_qregci##qmode (__o, \ (signedtype) __temp.val[1], 1); \ __o = __builtin_aarch64_set_qregci##qmode (__o, \ (signedtype) __temp.val[2], 2); \ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __o, __c); \ } __ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16, float16x8_t) __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, float32x4_t) __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, float64x2_t) __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, int8x16_t) __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, int16x8_t) __ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64, poly64x2_t) __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, int8x16_t) __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, int16x8_t) __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, int32x4_t) __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, int64x2_t) __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, int8x16_t) __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, int16x8_t) __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, int32x4_t) __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, int64x2_t) #undef __ST3_LANE_FUNC #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ union { intype __i; \ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __temp.__o, __c); \ } __ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16) __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) __ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64) __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) #define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \ qmode, ptr_mode, funcsuffix, signedtype) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ __builtin_aarch64_simd_xi __o; \ largetype __temp; \ __temp.val[0] \ = vcombine_##funcsuffix (__b.val[0], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[1] \ = vcombine_##funcsuffix (__b.val[1], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[2] \ = vcombine_##funcsuffix (__b.val[2], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __temp.val[3] \ = vcombine_##funcsuffix (__b.val[3], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ (signedtype) __temp.val[0], 0); \ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ (signedtype) __temp.val[1], 1); \ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ (signedtype) __temp.val[2], 2); \ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ (signedtype) __temp.val[3], 3); \ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __o, __c); \ } __ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16, float16x8_t) __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, float32x4_t) __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, float64x2_t) __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, int8x16_t) __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, int16x8_t) __ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64, poly64x2_t) __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, int8x16_t) __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, int16x8_t) __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, int32x4_t) __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, int64x2_t) __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, int8x16_t) __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, int16x8_t) __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, int32x4_t) __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, int64x2_t) #undef __ST4_LANE_FUNC #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ __extension__ extern __inline void \ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ intype __b, const int __c) \ { \ union { intype __i; \ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ __ptr, __temp.__o, __c); \ } __ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16) __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) __ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64) __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s32 (int32x2_t a) { int64_t result; __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); return result; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u32 (uint32x2_t a) { uint64_t result; __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); return result; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) { return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) { return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); } /* Table intrinsics. */ __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_p8 (poly8x16_t a, uint8x8_t b) { poly8x8_t result; __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_s8 (int8x16_t a, uint8x8_t b) { int8x8_t result; __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_u8 (uint8x16_t a, uint8x8_t b) { uint8x8_t result; __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) { poly8x16_t result; __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_s8 (int8x16_t a, uint8x16_t b) { int8x16_t result; __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) { uint8x16_t result; __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" : "=w"(result) : "w"(a), "w"(b) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) { int8x8_t result = r; __asm__ ("tbx %0.8b,{%1.16b},%2.8b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) { uint8x8_t result = r; __asm__ ("tbx %0.8b,{%1.16b},%2.8b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) { poly8x8_t result = r; __asm__ ("tbx %0.8b,{%1.16b},%2.8b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) { int8x16_t result = r; __asm__ ("tbx %0.16b,{%1.16b},%2.16b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) { uint8x16_t result = r; __asm__ ("tbx %0.16b,{%1.16b},%2.16b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) { poly8x16_t result = r; __asm__ ("tbx %0.16b,{%1.16b},%2.16b" : "+w"(result) : "w"(tab), "w"(idx) : /* No clobbers */); return result; } /* V7 legacy table intrinsics. */ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_s8 (int8x8_t tab, int8x8_t idx) { int8x8_t result; int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) { uint8x8_t result; uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) { poly8x8_t result; poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) { int8x8_t result; int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) { uint8x8_t result; uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) { poly8x8_t result; poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) { int8x8_t result; int8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = __builtin_aarch64_tbl3v8qi (__o, idx); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) { uint8x8_t result; uint8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) { poly8x8_t result; poly8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) { int8x8_t result; int8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = __builtin_aarch64_tbl3v8qi (__o, idx); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) { uint8x8_t result; uint8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) { poly8x8_t result; poly8x16x2_t temp; __builtin_aarch64_simd_oi __o; temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); return result; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) { int8x8_t result = r; int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" : "+w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) { uint8x8_t result = r; uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" : "+w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) { poly8x8_t result = r; poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" : "+w"(result) : "w"(temp), "w"(idx) : /* No clobbers */); return result; } /* End of temporary inline asm. */ /* Start of optimal implementations in approved order. */ /* vabd. */ __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabds_f32 (float32_t __a, float32_t __b) { return __builtin_aarch64_fabdsf (__a, __b); } __extension__ extern __inline float64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdd_f64 (float64_t __a, float64_t __b) { return __builtin_aarch64_fabddf (__a, __b); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_f32 (float32x2_t __a, float32x2_t __b) { return __builtin_aarch64_fabdv2sf (__a, __b); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabd_f64 (float64x1_t __a, float64x1_t __b) { return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0), vget_lane_f64 (__b, 0))}; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_f32 (float32x4_t __a, float32x4_t __b) { return __builtin_aarch64_fabdv4sf (__a, __b); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdq_f64 (float64x2_t __a, float64x2_t __b) { return __builtin_aarch64_fabdv2df (__a, __b); } /* vabs */ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_f32 (float32x2_t __a) { return __builtin_aarch64_absv2sf (__a); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_f64 (float64x1_t __a) { return (float64x1_t) {__builtin_fabs (__a[0])}; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_s8 (int8x8_t __a) { return __builtin_aarch64_absv8qi (__a); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_s16 (int16x4_t __a) { return __builtin_aarch64_absv4hi (__a); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_s32 (int32x2_t __a) { return __builtin_aarch64_absv2si (__a); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabs_s64 (int64x1_t __a) { return (int64x1_t) {__builtin_aarch64_absdi (__a[0])}; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_f32 (float32x4_t __a) { return __builtin_aarch64_absv4sf (__a); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_f64 (float64x2_t __a) { return __builtin_aarch64_absv2df (__a); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_s8 (int8x16_t __a) { return __builtin_aarch64_absv16qi (__a); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_s16 (int16x8_t __a) { return __builtin_aarch64_absv8hi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_s32 (int32x4_t __a) { return __builtin_aarch64_absv4si (__a); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsq_s64 (int64x2_t __a) { return __builtin_aarch64_absv2di (__a); } /* Try to avoid moving between integer and vector registers. For why the cast to unsigned is needed check the vnegd_s64 intrinsic. There is a testcase related to this issue: gcc.target/aarch64/vabsd_s64.c. */ __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsd_s64 (int64_t __a) { return __a < 0 ? - (uint64_t) __a : __a; } /* vadd */ __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddd_s64 (int64_t __a, int64_t __b) { return __a + __b; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddd_u64 (uint64_t __a, uint64_t __b) { return __a + __b; } /* vaddv */ __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_s8 (int8x8_t __a) { return __builtin_aarch64_reduc_plus_scal_v8qi (__a); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_s16 (int16x4_t __a) { return __builtin_aarch64_reduc_plus_scal_v4hi (__a); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_s32 (int32x2_t __a) { return __builtin_aarch64_reduc_plus_scal_v2si (__a); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u8 (uint8x8_t __a) { return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u16 (uint16x4_t __a) { return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u32 (uint32x2_t __a) { return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a); } __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_s8 (int8x16_t __a) { return __builtin_aarch64_reduc_plus_scal_v16qi (__a); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_s16 (int16x8_t __a) { return __builtin_aarch64_reduc_plus_scal_v8hi (__a); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_s32 (int32x4_t __a) { return __builtin_aarch64_reduc_plus_scal_v4si (__a); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_s64 (int64x2_t __a) { return __builtin_aarch64_reduc_plus_scal_v2di (__a); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u8 (uint8x16_t __a) { return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u16 (uint16x8_t __a) { return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u32 (uint32x4_t __a) { return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u64 (uint64x2_t __a) { return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_f32 (float32x2_t __a) { return __builtin_aarch64_reduc_plus_scal_v2sf (__a); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_f32 (float32x4_t __a) { return __builtin_aarch64_reduc_plus_scal_v4sf (__a); } __extension__ extern __inline float64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_f64 (float64x2_t __a) { return __builtin_aarch64_reduc_plus_scal_v2df (__a); } /* vbsl */ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c) { return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) { return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c) { return (float64x1_t) { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) }; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) { return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) { return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) { return (poly64x1_t) {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])}; } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) { return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) { return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) { return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) { return (int64x1_t) {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) { return (uint64x1_t) {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])}; } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c) { return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) { return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c) { return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c); } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) { return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) { return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c); } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) { return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) { return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) { return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); } /* ARMv8.1-A instrinsics. */ #pragma GCC push_options #pragma GCC target ("+nothing+rdma") __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) { return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) { return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) { return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) { return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) { return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) { return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) { return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) { return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c) { return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c) { return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c) { return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c) { return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) { return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d); } #pragma GCC pop_options #pragma GCC push_options #pragma GCC target ("+nothing+crypto") /* vaes */ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaeseq_u8 (uint8x16_t data, uint8x16_t key) { return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaesdq_u8 (uint8x16_t data, uint8x16_t key) { return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaesmcq_u8 (uint8x16_t data) { return __builtin_aarch64_crypto_aesmcv16qi_uu (data); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaesimcq_u8 (uint8x16_t data) { return __builtin_aarch64_crypto_aesimcv16qi_uu (data); } #pragma GCC pop_options /* vcage */ __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcage_f64 (float64x1_t __a, float64x1_t __b) { return vabs_f64 (__a) >= vabs_f64 (__b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcages_f32 (float32_t __a, float32_t __b) { return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcage_f32 (float32x2_t __a, float32x2_t __b) { return vabs_f32 (__a) >= vabs_f32 (__b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcageq_f32 (float32x4_t __a, float32x4_t __b) { return vabsq_f32 (__a) >= vabsq_f32 (__b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaged_f64 (float64_t __a, float64_t __b) { return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcageq_f64 (float64x2_t __a, float64x2_t __b) { return vabsq_f64 (__a) >= vabsq_f64 (__b); } /* vcagt */ __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagts_f32 (float32_t __a, float32_t __b) { return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagt_f32 (float32x2_t __a, float32x2_t __b) { return vabs_f32 (__a) > vabs_f32 (__b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagt_f64 (float64x1_t __a, float64x1_t __b) { return vabs_f64 (__a) > vabs_f64 (__b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagtq_f32 (float32x4_t __a, float32x4_t __b) { return vabsq_f32 (__a) > vabsq_f32 (__b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagtd_f64 (float64_t __a, float64_t __b) { return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagtq_f64 (float64x2_t __a, float64x2_t __b) { return vabsq_f64 (__a) > vabsq_f64 (__b); } /* vcale */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcale_f32 (float32x2_t __a, float32x2_t __b) { return vabs_f32 (__a) <= vabs_f32 (__b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcale_f64 (float64x1_t __a, float64x1_t __b) { return vabs_f64 (__a) <= vabs_f64 (__b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaled_f64 (float64_t __a, float64_t __b) { return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0; } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcales_f32 (float32_t __a, float32_t __b) { return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaleq_f32 (float32x4_t __a, float32x4_t __b) { return vabsq_f32 (__a) <= vabsq_f32 (__b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaleq_f64 (float64x2_t __a, float64x2_t __b) { return vabsq_f64 (__a) <= vabsq_f64 (__b); } /* vcalt */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcalt_f32 (float32x2_t __a, float32x2_t __b) { return vabs_f32 (__a) < vabs_f32 (__b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcalt_f64 (float64x1_t __a, float64x1_t __b) { return vabs_f64 (__a) < vabs_f64 (__b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaltd_f64 (float64_t __a, float64_t __b) { return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaltq_f32 (float32x4_t __a, float32x4_t __b) { return vabsq_f32 (__a) < vabsq_f32 (__b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaltq_f64 (float64x2_t __a, float64x2_t __b) { return vabsq_f64 (__a) < vabsq_f64 (__b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcalts_f32 (float32_t __a, float32_t __b) { return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0; } /* vceq - vector. */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_f32 (float32x2_t __a, float32x2_t __b) { return (uint32x2_t) (__a == __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_f64 (float64x1_t __a, float64x1_t __b) { return (uint64x1_t) (__a == __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_p8 (poly8x8_t __a, poly8x8_t __b) { return (uint8x8_t) (__a == __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_p64 (poly64x1_t __a, poly64x1_t __b) { return (uint64x1_t) (__a == __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) (__a == __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_s16 (int16x4_t __a, int16x4_t __b) { return (uint16x4_t) (__a == __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_s32 (int32x2_t __a, int32x2_t __b) { return (uint32x2_t) (__a == __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_s64 (int64x1_t __a, int64x1_t __b) { return (uint64x1_t) (__a == __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_u8 (uint8x8_t __a, uint8x8_t __b) { return (__a == __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_u16 (uint16x4_t __a, uint16x4_t __b) { return (__a == __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_u32 (uint32x2_t __a, uint32x2_t __b) { return (__a == __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceq_u64 (uint64x1_t __a, uint64x1_t __b) { return (__a == __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_f32 (float32x4_t __a, float32x4_t __b) { return (uint32x4_t) (__a == __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_f64 (float64x2_t __a, float64x2_t __b) { return (uint64x2_t) (__a == __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_p8 (poly8x16_t __a, poly8x16_t __b) { return (uint8x16_t) (__a == __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) (__a == __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_s16 (int16x8_t __a, int16x8_t __b) { return (uint16x8_t) (__a == __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_s32 (int32x4_t __a, int32x4_t __b) { return (uint32x4_t) (__a == __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_s64 (int64x2_t __a, int64x2_t __b) { return (uint64x2_t) (__a == __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_u8 (uint8x16_t __a, uint8x16_t __b) { return (__a == __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_u16 (uint16x8_t __a, uint16x8_t __b) { return (__a == __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_u32 (uint32x4_t __a, uint32x4_t __b) { return (__a == __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqq_u64 (uint64x2_t __a, uint64x2_t __b) { return (__a == __b); } /* vceq - scalar. */ __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqs_f32 (float32_t __a, float32_t __b) { return __a == __b ? -1 : 0; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqd_s64 (int64_t __a, int64_t __b) { return __a == __b ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqd_u64 (uint64_t __a, uint64_t __b) { return __a == __b ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqd_f64 (float64_t __a, float64_t __b) { return __a == __b ? -1ll : 0ll; } /* vceqz - vector. */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_f32 (float32x2_t __a) { return (uint32x2_t) (__a == 0.0f); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_f64 (float64x1_t __a) { return (uint64x1_t) (__a == (float64x1_t) {0.0}); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_p8 (poly8x8_t __a) { return (uint8x8_t) (__a == 0); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_s8 (int8x8_t __a) { return (uint8x8_t) (__a == 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_s16 (int16x4_t __a) { return (uint16x4_t) (__a == 0); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_s32 (int32x2_t __a) { return (uint32x2_t) (__a == 0); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_s64 (int64x1_t __a) { return (uint64x1_t) (__a == __AARCH64_INT64_C (0)); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_u8 (uint8x8_t __a) { return (__a == 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_u16 (uint16x4_t __a) { return (__a == 0); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_u32 (uint32x2_t __a) { return (__a == 0); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqz_u64 (uint64x1_t __a) { return (__a == __AARCH64_UINT64_C (0)); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_f32 (float32x4_t __a) { return (uint32x4_t) (__a == 0.0f); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_f64 (float64x2_t __a) { return (uint64x2_t) (__a == 0.0f); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_p8 (poly8x16_t __a) { return (uint8x16_t) (__a == 0); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_s8 (int8x16_t __a) { return (uint8x16_t) (__a == 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_s16 (int16x8_t __a) { return (uint16x8_t) (__a == 0); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_s32 (int32x4_t __a) { return (uint32x4_t) (__a == 0); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_s64 (int64x2_t __a) { return (uint64x2_t) (__a == __AARCH64_INT64_C (0)); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_u8 (uint8x16_t __a) { return (__a == 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_u16 (uint16x8_t __a) { return (__a == 0); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_u32 (uint32x4_t __a) { return (__a == 0); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_u64 (uint64x2_t __a) { return (__a == __AARCH64_UINT64_C (0)); } /* vceqz - scalar. */ __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzs_f32 (float32_t __a) { return __a == 0.0f ? -1 : 0; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzd_s64 (int64_t __a) { return __a == 0 ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzd_u64 (uint64_t __a) { return __a == 0 ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzd_f64 (float64_t __a) { return __a == 0.0 ? -1ll : 0ll; } /* vcge - vector. */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_f32 (float32x2_t __a, float32x2_t __b) { return (uint32x2_t) (__a >= __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_f64 (float64x1_t __a, float64x1_t __b) { return (uint64x1_t) (__a >= __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) (__a >= __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_s16 (int16x4_t __a, int16x4_t __b) { return (uint16x4_t) (__a >= __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_s32 (int32x2_t __a, int32x2_t __b) { return (uint32x2_t) (__a >= __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_s64 (int64x1_t __a, int64x1_t __b) { return (uint64x1_t) (__a >= __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_u8 (uint8x8_t __a, uint8x8_t __b) { return (__a >= __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_u16 (uint16x4_t __a, uint16x4_t __b) { return (__a >= __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_u32 (uint32x2_t __a, uint32x2_t __b) { return (__a >= __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcge_u64 (uint64x1_t __a, uint64x1_t __b) { return (__a >= __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_f32 (float32x4_t __a, float32x4_t __b) { return (uint32x4_t) (__a >= __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_f64 (float64x2_t __a, float64x2_t __b) { return (uint64x2_t) (__a >= __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) (__a >= __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_s16 (int16x8_t __a, int16x8_t __b) { return (uint16x8_t) (__a >= __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_s32 (int32x4_t __a, int32x4_t __b) { return (uint32x4_t) (__a >= __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_s64 (int64x2_t __a, int64x2_t __b) { return (uint64x2_t) (__a >= __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) { return (__a >= __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) { return (__a >= __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) { return (__a >= __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) { return (__a >= __b); } /* vcge - scalar. */ __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcges_f32 (float32_t __a, float32_t __b) { return __a >= __b ? -1 : 0; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcged_s64 (int64_t __a, int64_t __b) { return __a >= __b ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcged_u64 (uint64_t __a, uint64_t __b) { return __a >= __b ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcged_f64 (float64_t __a, float64_t __b) { return __a >= __b ? -1ll : 0ll; } /* vcgez - vector. */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_f32 (float32x2_t __a) { return (uint32x2_t) (__a >= 0.0f); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_f64 (float64x1_t __a) { return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0}); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_s8 (int8x8_t __a) { return (uint8x8_t) (__a >= 0); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_s16 (int16x4_t __a) { return (uint16x4_t) (__a >= 0); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_s32 (int32x2_t __a) { return (uint32x2_t) (__a >= 0); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgez_s64 (int64x1_t __a) { return (uint64x1_t) (__a >= __AARCH64_INT64_C (0)); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_f32 (float32x4_t __a) { return (uint32x4_t) (__a >= 0.0f); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_f64 (float64x2_t __a) { return (uint64x2_t) (__a >= 0.0); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_s8 (int8x16_t __a) { return (uint8x16_t) (__a >= 0); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_s16 (int16x8_t __a) { return (uint16x8_t) (__a >= 0); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_s32 (int32x4_t __a) { return (uint32x4_t) (__a >= 0); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezq_s64 (int64x2_t __a) { return (uint64x2_t) (__a >= __AARCH64_INT64_C (0)); } /* vcgez - scalar. */ __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezs_f32 (float32_t __a) { return __a >= 0.0f ? -1 : 0; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezd_s64 (int64_t __a) { return __a >= 0 ? -1ll : 0ll; } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezd_f64 (float64_t __a) { return __a >= 0.0 ? -1ll : 0ll; } /* vcgt - vector. */ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_f32 (float32x2_t __a, float32x2_t __b) { return (uint32x2_t) (__a > __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_f64 (float64x1_t __a, float64x1_t __b) { return (uint64x1_t) (__a > __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) (__a > __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_s16 (int16x4_t __a, int16x4_t __b) { return (uint16x4_t) (__a > __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_s32 (int32x2_t __a, int32x2_t __b) { return (uint32x2_t) (__a > __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_s64 (int64x1_t __a, int64x1_t __b) { return (uint64x1_t) (__a > __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_u8 (uint8x8_t __a, uint8x8_t __b) { return (__a > __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_u16 (uint16x4_t __a, uint16x4_t __b) { return (__a > __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_u32 (uint32x2_t __a, uint32x2_t __b) { return (__a > __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgt_u64 (uint64x1_t __a, uint64x1_t __b) { return (__a > __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_f32 (float32x4_t __a, float32x4_t __b) { return (uint32x4_t) (__a > __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_f64 (float64x2_t __a, float64x2_t __b) { return (uint64x2_t) (__a > __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) (__a > __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_s16 (int16x8_t __a, int16x8_t __b) { return (uint16x8_t) (__a > __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_s32 (int32x4_t __a, int32x4_t __b) { return (uint32x4_t) (__a > __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_s64 (int64x2_t __a, int64x2_t __b) { return (uint64x2_t) (__a > __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) { return (__a > __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) { return (__a > __b); }