Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/arm_neon.h @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* ARM NEON intrinsics include file. | 1 /* ARM NEON intrinsics include file. |
2 | 2 |
3 Copyright (C) 2006-2018 Free Software Foundation, Inc. | 3 Copyright (C) 2006-2020 Free Software Foundation, Inc. |
4 Contributed by CodeSourcery. | 4 Contributed by CodeSourcery. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
8 GCC is free software; you can redistribute it and/or modify it | 8 GCC is free software; you can redistribute it and/or modify it |
37 #ifdef __cplusplus | 37 #ifdef __cplusplus |
38 extern "C" { | 38 extern "C" { |
39 #endif | 39 #endif |
40 | 40 |
41 #include <arm_fp16.h> | 41 #include <arm_fp16.h> |
42 #include <arm_bf16.h> | |
42 #include <stdint.h> | 43 #include <stdint.h> |
43 | 44 |
44 typedef __simd64_int8_t int8x8_t; | 45 typedef __simd64_int8_t int8x8_t; |
45 typedef __simd64_int16_t int16x4_t; | 46 typedef __simd64_int16_t int16x4_t; |
46 typedef __simd64_int32_t int32x2_t; | 47 typedef __simd64_int32_t int32x2_t; |
81 typedef __simd128_uint32_t uint32x4_t; | 82 typedef __simd128_uint32_t uint32x4_t; |
82 typedef __simd128_uint64_t uint64x2_t; | 83 typedef __simd128_uint64_t uint64x2_t; |
83 | 84 |
84 typedef float float32_t; | 85 typedef float float32_t; |
85 | 86 |
87 typedef __simd128_bfloat16_t bfloat16x8_t; | |
88 typedef __simd64_bfloat16_t bfloat16x4_t; | |
89 | |
86 /* The Poly types are user visible and live in their own world, | 90 /* The Poly types are user visible and live in their own world, |
87 keep them that way. */ | 91 keep them that way. */ |
88 typedef __builtin_neon_poly8 poly8_t; | 92 typedef __builtin_neon_poly8 poly8_t; |
89 typedef __builtin_neon_poly16 poly16_t; | 93 typedef __builtin_neon_poly16 poly16_t; |
90 #pragma GCC push_options | 94 #pragma GCC push_options |
16936 | 16940 |
16937 __extension__ extern __inline uint32_t | 16941 __extension__ extern __inline uint32_t |
16938 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 16942 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
16939 vsha1h_u32 (uint32_t __hash_e) | 16943 vsha1h_u32 (uint32_t __hash_e) |
16940 { | 16944 { |
16941 uint32x4_t __t = vdupq_n_u32 (0); | 16945 return vgetq_lane_u32 (__builtin_arm_crypto_sha1h (vdupq_n_u32 (__hash_e)), |
16942 __t = vsetq_lane_u32 (__hash_e, __t, 0); | 16946 0); |
16943 __t = __builtin_arm_crypto_sha1h (__t); | |
16944 return vgetq_lane_u32 (__t, 0); | |
16945 } | 16947 } |
16946 | 16948 |
16947 __extension__ extern __inline uint32x4_t | 16949 __extension__ extern __inline uint32x4_t |
16948 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 16950 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
16949 vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) | 16951 vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) |
16950 { | 16952 { |
16951 uint32x4_t __t = vdupq_n_u32 (0); | 16953 return __builtin_arm_crypto_sha1c (__hash_abcd, vdupq_n_u32 (__hash_e), |
16952 __t = vsetq_lane_u32 (__hash_e, __t, 0); | 16954 __wk); |
16953 return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); | |
16954 } | 16955 } |
16955 | 16956 |
16956 __extension__ extern __inline uint32x4_t | 16957 __extension__ extern __inline uint32x4_t |
16957 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 16958 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
16958 vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) | 16959 vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) |
16959 { | 16960 { |
16960 uint32x4_t __t = vdupq_n_u32 (0); | 16961 return __builtin_arm_crypto_sha1p (__hash_abcd, vdupq_n_u32 (__hash_e), |
16961 __t = vsetq_lane_u32 (__hash_e, __t, 0); | 16962 __wk); |
16962 return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); | |
16963 } | 16963 } |
16964 | 16964 |
16965 __extension__ extern __inline uint32x4_t | 16965 __extension__ extern __inline uint32x4_t |
16966 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 16966 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
16967 vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) | 16967 vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) |
16968 { | 16968 { |
16969 uint32x4_t __t = vdupq_n_u32 (0); | 16969 return __builtin_arm_crypto_sha1m (__hash_abcd, vdupq_n_u32 (__hash_e), |
16970 __t = vsetq_lane_u32 (__hash_e, __t, 0); | 16970 __wk); |
16971 return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); | |
16972 } | 16971 } |
16973 | 16972 |
16974 __extension__ extern __inline uint32x4_t | 16973 __extension__ extern __inline uint32x4_t |
16975 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 16974 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
16976 vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) | 16975 vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) |
18104 #pragma GCC push_options | 18103 #pragma GCC push_options |
18105 #pragma GCC target ("arch=armv8.2-a+fp16fml") | 18104 #pragma GCC target ("arch=armv8.2-a+fp16fml") |
18106 | 18105 |
18107 __extension__ extern __inline float32x2_t | 18106 __extension__ extern __inline float32x2_t |
18108 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18107 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18109 vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) | 18108 vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) |
18110 { | 18109 { |
18111 return __builtin_neon_vfmal_lowv2sf (__r, __a, __b); | 18110 return __builtin_neon_vfmal_lowv2sf (__r, __a, __b); |
18112 } | 18111 } |
18113 | 18112 |
18114 __extension__ extern __inline float32x2_t | 18113 __extension__ extern __inline float32x2_t |
18115 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18114 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18116 vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) | 18115 vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) |
18117 { | 18116 { |
18118 return __builtin_neon_vfmsl_lowv2sf (__r, __a, __b); | 18117 return __builtin_neon_vfmsl_lowv2sf (__r, __a, __b); |
18119 } | 18118 } |
18120 | 18119 |
18121 __extension__ extern __inline float32x2_t | 18120 __extension__ extern __inline float32x2_t |
18122 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18121 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18123 vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) | 18122 vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) |
18124 { | 18123 { |
18125 return __builtin_neon_vfmal_highv2sf (__r, __a, __b); | 18124 return __builtin_neon_vfmal_highv2sf (__r, __a, __b); |
18126 } | 18125 } |
18127 | 18126 |
18128 __extension__ extern __inline float32x2_t | 18127 __extension__ extern __inline float32x2_t |
18129 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18128 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18130 vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) | 18129 vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) |
18131 { | 18130 { |
18132 return __builtin_neon_vfmsl_highv2sf (__r, __a, __b); | 18131 return __builtin_neon_vfmsl_highv2sf (__r, __a, __b); |
18133 } | 18132 } |
18134 | 18133 |
18135 __extension__ extern __inline float32x4_t | 18134 __extension__ extern __inline float32x4_t |
18136 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18135 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18137 vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) | 18136 vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) |
18138 { | 18137 { |
18139 return __builtin_neon_vfmal_lowv4sf (__r, __a, __b); | 18138 return __builtin_neon_vfmal_lowv4sf (__r, __a, __b); |
18140 } | 18139 } |
18141 | 18140 |
18142 __extension__ extern __inline float32x4_t | 18141 __extension__ extern __inline float32x4_t |
18143 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18142 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18144 vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) | 18143 vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) |
18145 { | 18144 { |
18146 return __builtin_neon_vfmsl_lowv4sf (__r, __a, __b); | 18145 return __builtin_neon_vfmsl_lowv4sf (__r, __a, __b); |
18147 } | 18146 } |
18148 | 18147 |
18149 __extension__ extern __inline float32x4_t | 18148 __extension__ extern __inline float32x4_t |
18150 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18149 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18151 vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) | 18150 vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) |
18152 { | 18151 { |
18153 return __builtin_neon_vfmal_highv4sf (__r, __a, __b); | 18152 return __builtin_neon_vfmal_highv4sf (__r, __a, __b); |
18154 } | 18153 } |
18155 | 18154 |
18156 __extension__ extern __inline float32x4_t | 18155 __extension__ extern __inline float32x4_t |
18157 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18156 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18158 vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) | 18157 vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) |
18159 { | 18158 { |
18160 return __builtin_neon_vfmsl_highv4sf (__r, __a, __b); | 18159 return __builtin_neon_vfmsl_highv4sf (__r, __a, __b); |
18161 } | 18160 } |
18162 | 18161 |
18163 __extension__ extern __inline float32x2_t | 18162 __extension__ extern __inline float32x2_t |
18164 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18163 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18165 vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, | 18164 vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, |
18166 const int __index) | 18165 const int __index) |
18167 { | 18166 { |
18168 __builtin_arm_lane_check (4, __index); | 18167 __builtin_arm_lane_check (4, __index); |
18169 return __builtin_neon_vfmal_lane_lowv2sf (__r, __a, __b, __index); | 18168 return __builtin_neon_vfmal_lane_lowv2sf (__r, __a, __b, __index); |
18170 } | 18169 } |
18171 | 18170 |
18172 __extension__ extern __inline float32x2_t | 18171 __extension__ extern __inline float32x2_t |
18173 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18172 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18174 vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, | 18173 vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, |
18175 const int __index) | 18174 const int __index) |
18176 { | 18175 { |
18177 __builtin_arm_lane_check (4, __index); | 18176 __builtin_arm_lane_check (4, __index); |
18178 return __builtin_neon_vfmal_lane_highv2sf (__r, __a, __b, __index); | 18177 return __builtin_neon_vfmal_lane_highv2sf (__r, __a, __b, __index); |
18179 } | 18178 } |
18180 | 18179 |
18181 __extension__ extern __inline float32x4_t | 18180 __extension__ extern __inline float32x4_t |
18182 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18181 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18183 vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, | 18182 vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, |
18184 const int __index) | 18183 const int __index) |
18185 { | 18184 { |
18186 __builtin_arm_lane_check (8, __index); | 18185 __builtin_arm_lane_check (8, __index); |
18187 return __builtin_neon_vfmal_lane_lowv4sf (__r, __a, __b, __index); | 18186 return __builtin_neon_vfmal_lane_lowv4sf (__r, __a, __b, __index); |
18188 } | 18187 } |
18189 | 18188 |
18190 __extension__ extern __inline float32x4_t | 18189 __extension__ extern __inline float32x4_t |
18191 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18190 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18192 vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, | 18191 vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, |
18193 const int __index) | 18192 const int __index) |
18194 { | 18193 { |
18195 __builtin_arm_lane_check (4, __index); | 18194 __builtin_arm_lane_check (4, __index); |
18196 return __builtin_neon_vfmal_lane_lowv4hfv4sf (__r, __a, __b, __index); | 18195 return __builtin_neon_vfmal_lane_lowv4hfv4sf (__r, __a, __b, __index); |
18197 } | 18196 } |
18198 | 18197 |
18199 __extension__ extern __inline float32x2_t | 18198 __extension__ extern __inline float32x2_t |
18200 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18199 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18201 vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, | 18200 vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, |
18202 const int __index) | 18201 const int __index) |
18203 { | 18202 { |
18204 __builtin_arm_lane_check (8, __index); | 18203 __builtin_arm_lane_check (8, __index); |
18205 return __builtin_neon_vfmal_lane_lowv8hfv2sf (__r, __a, __b, __index); | 18204 return __builtin_neon_vfmal_lane_lowv8hfv2sf (__r, __a, __b, __index); |
18206 } | 18205 } |
18207 | 18206 |
18208 __extension__ extern __inline float32x4_t | 18207 __extension__ extern __inline float32x4_t |
18209 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18208 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18210 vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, | 18209 vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, |
18211 const int __index) | 18210 const int __index) |
18212 { | 18211 { |
18213 __builtin_arm_lane_check (8, __index); | 18212 __builtin_arm_lane_check (8, __index); |
18214 return __builtin_neon_vfmal_lane_highv4sf (__r, __a, __b, __index); | 18213 return __builtin_neon_vfmal_lane_highv4sf (__r, __a, __b, __index); |
18215 } | 18214 } |
18216 | 18215 |
18217 __extension__ extern __inline float32x4_t | 18216 __extension__ extern __inline float32x4_t |
18218 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18217 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18219 vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, | 18218 vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, |
18220 const int __index) | 18219 const int __index) |
18221 { | 18220 { |
18222 __builtin_arm_lane_check (4, __index); | 18221 __builtin_arm_lane_check (4, __index); |
18223 return __builtin_neon_vfmal_lane_highv4hfv4sf (__r, __a, __b, __index); | 18222 return __builtin_neon_vfmal_lane_highv4hfv4sf (__r, __a, __b, __index); |
18224 } | 18223 } |
18225 | 18224 |
18226 __extension__ extern __inline float32x2_t | 18225 __extension__ extern __inline float32x2_t |
18227 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18226 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18228 vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, | 18227 vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, |
18229 const int __index) | 18228 const int __index) |
18230 { | 18229 { |
18231 __builtin_arm_lane_check (8, __index); | 18230 __builtin_arm_lane_check (8, __index); |
18232 return __builtin_neon_vfmal_lane_highv8hfv2sf (__r, __a, __b, __index); | 18231 return __builtin_neon_vfmal_lane_highv8hfv2sf (__r, __a, __b, __index); |
18233 } | 18232 } |
18234 | 18233 |
18235 __extension__ extern __inline float32x2_t | 18234 __extension__ extern __inline float32x2_t |
18236 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18235 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18237 vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, | 18236 vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, |
18238 const int __index) | 18237 const int __index) |
18239 { | 18238 { |
18240 __builtin_arm_lane_check (4, __index); | 18239 __builtin_arm_lane_check (4, __index); |
18241 return __builtin_neon_vfmsl_lane_lowv2sf (__r, __a, __b, __index); | 18240 return __builtin_neon_vfmsl_lane_lowv2sf (__r, __a, __b, __index); |
18242 } | 18241 } |
18243 | 18242 |
18244 __extension__ extern __inline float32x2_t | 18243 __extension__ extern __inline float32x2_t |
18245 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18244 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18246 vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, | 18245 vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, |
18247 const int __index) | 18246 const int __index) |
18248 { | 18247 { |
18249 __builtin_arm_lane_check (4, __index); | 18248 __builtin_arm_lane_check (4, __index); |
18250 return __builtin_neon_vfmsl_lane_highv2sf (__r, __a, __b, __index); | 18249 return __builtin_neon_vfmsl_lane_highv2sf (__r, __a, __b, __index); |
18251 } | 18250 } |
18252 | 18251 |
18253 __extension__ extern __inline float32x4_t | 18252 __extension__ extern __inline float32x4_t |
18254 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18253 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18255 vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, | 18254 vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, |
18256 const int __index) | 18255 const int __index) |
18257 { | 18256 { |
18258 __builtin_arm_lane_check (8, __index); | 18257 __builtin_arm_lane_check (8, __index); |
18259 return __builtin_neon_vfmsl_lane_lowv4sf (__r, __a, __b, __index); | 18258 return __builtin_neon_vfmsl_lane_lowv4sf (__r, __a, __b, __index); |
18260 } | 18259 } |
18261 | 18260 |
18262 __extension__ extern __inline float32x4_t | 18261 __extension__ extern __inline float32x4_t |
18263 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18262 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18264 vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, | 18263 vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, |
18265 const int __index) | 18264 const int __index) |
18266 { | 18265 { |
18267 __builtin_arm_lane_check (4, __index); | 18266 __builtin_arm_lane_check (4, __index); |
18268 return __builtin_neon_vfmsl_lane_lowv4hfv4sf (__r, __a, __b, __index); | 18267 return __builtin_neon_vfmsl_lane_lowv4hfv4sf (__r, __a, __b, __index); |
18269 } | 18268 } |
18270 | 18269 |
18271 __extension__ extern __inline float32x2_t | 18270 __extension__ extern __inline float32x2_t |
18272 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18271 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18273 vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, | 18272 vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, |
18274 const int __index) | 18273 const int __index) |
18275 { | 18274 { |
18276 __builtin_arm_lane_check (8, __index); | 18275 __builtin_arm_lane_check (8, __index); |
18277 return __builtin_neon_vfmsl_lane_lowv8hfv2sf (__r, __a, __b, __index); | 18276 return __builtin_neon_vfmsl_lane_lowv8hfv2sf (__r, __a, __b, __index); |
18278 } | 18277 } |
18279 | 18278 |
18280 __extension__ extern __inline float32x4_t | 18279 __extension__ extern __inline float32x4_t |
18281 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18280 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18282 vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, | 18281 vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, |
18283 const int __index) | 18282 const int __index) |
18284 { | 18283 { |
18285 __builtin_arm_lane_check (8, __index); | 18284 __builtin_arm_lane_check (8, __index); |
18286 return __builtin_neon_vfmsl_lane_highv4sf (__r, __a, __b, __index); | 18285 return __builtin_neon_vfmsl_lane_highv4sf (__r, __a, __b, __index); |
18287 } | 18286 } |
18288 | 18287 |
18289 __extension__ extern __inline float32x4_t | 18288 __extension__ extern __inline float32x4_t |
18290 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18289 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18291 vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, | 18290 vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, |
18292 const int __index) | 18291 const int __index) |
18293 { | 18292 { |
18294 __builtin_arm_lane_check (4, __index); | 18293 __builtin_arm_lane_check (4, __index); |
18295 return __builtin_neon_vfmsl_lane_highv4hfv4sf (__r, __a, __b, __index); | 18294 return __builtin_neon_vfmsl_lane_highv4hfv4sf (__r, __a, __b, __index); |
18296 } | 18295 } |
18297 | 18296 |
18298 __extension__ extern __inline float32x2_t | 18297 __extension__ extern __inline float32x2_t |
18299 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | 18298 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
18300 vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, | 18299 vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, |
18301 const int __index) | 18300 const int __index) |
18302 { | 18301 { |
18303 __builtin_arm_lane_check (8, __index); | 18302 __builtin_arm_lane_check (8, __index); |
18304 return __builtin_neon_vfmsl_lane_highv8hfv2sf (__r, __a, __b, __index); | 18303 return __builtin_neon_vfmsl_lane_highv8hfv2sf (__r, __a, __b, __index); |
18305 } | 18304 } |
18306 | 18305 |
18307 #pragma GCC pop_options | 18306 #pragma GCC pop_options |
18308 #endif | 18307 #endif |
18309 | 18308 |
18309 /* AdvSIMD Complex numbers intrinsics. */ | |
18310 #if __ARM_ARCH >= 8 | |
18311 #pragma GCC push_options | |
18312 #pragma GCC target ("arch=armv8.3-a") | |
18313 | |
18314 | |
18315 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) | |
18316 #pragma GCC push_options | |
18317 #pragma GCC target ("+fp16") | |
18318 __extension__ extern __inline float16x4_t | |
18319 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18320 vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b) | |
18321 { | |
18322 return __builtin_neon_vcadd90v4hf (__a, __b); | |
18323 } | |
18324 | |
18325 __extension__ extern __inline float16x8_t | |
18326 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18327 vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) | |
18328 { | |
18329 return __builtin_neon_vcadd90v8hf (__a, __b); | |
18330 } | |
18331 | |
18332 __extension__ extern __inline float16x4_t | |
18333 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18334 vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b) | |
18335 { | |
18336 return __builtin_neon_vcadd90v4hf (__a, __b); | |
18337 } | |
18338 | |
18339 __extension__ extern __inline float16x8_t | |
18340 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18341 vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) | |
18342 { | |
18343 return __builtin_neon_vcadd90v8hf (__a, __b); | |
18344 } | |
18345 | |
18346 __extension__ extern __inline float16x4_t | |
18347 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18348 vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) | |
18349 { | |
18350 return __builtin_neon_vcmla0v4hf (__r, __a, __b); | |
18351 } | |
18352 | |
18353 __extension__ extern __inline float16x8_t | |
18354 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18355 vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) | |
18356 { | |
18357 return __builtin_neon_vcmla0v8hf (__r, __a, __b); | |
18358 } | |
18359 | |
18360 __extension__ extern __inline float16x4_t | |
18361 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18362 vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, | |
18363 const int __index) | |
18364 { | |
18365 return __builtin_neon_vcmla_lane0v4hf (__r, __a, __b, __index); | |
18366 } | |
18367 | |
18368 __extension__ extern __inline float16x4_t | |
18369 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18370 vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, | |
18371 const int __index) | |
18372 { | |
18373 return __builtin_neon_vcmla_laneq0v4hf (__r, __a, __b, __index); | |
18374 } | |
18375 | |
18376 __extension__ extern __inline float16x8_t | |
18377 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18378 vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, | |
18379 const int __index) | |
18380 { | |
18381 return __builtin_neon_vcmlaq_lane0v8hf (__r, __a, __b, __index); | |
18382 } | |
18383 | |
18384 __extension__ extern __inline float16x8_t | |
18385 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18386 vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, | |
18387 const int __index) | |
18388 { | |
18389 return __builtin_neon_vcmla_lane0v8hf (__r, __a, __b, __index); | |
18390 } | |
18391 | |
18392 __extension__ extern __inline float16x4_t | |
18393 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18394 vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) | |
18395 { | |
18396 return __builtin_neon_vcmla90v4hf (__r, __a, __b); | |
18397 } | |
18398 | |
18399 __extension__ extern __inline float16x8_t | |
18400 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18401 vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) | |
18402 { | |
18403 return __builtin_neon_vcmla90v8hf (__r, __a, __b); | |
18404 } | |
18405 | |
18406 __extension__ extern __inline float16x4_t | |
18407 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18408 vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, | |
18409 const int __index) | |
18410 { | |
18411 return __builtin_neon_vcmla_lane90v4hf (__r, __a, __b, __index); | |
18412 } | |
18413 | |
18414 __extension__ extern __inline float16x4_t | |
18415 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18416 vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, | |
18417 const int __index) | |
18418 { | |
18419 return __builtin_neon_vcmla_laneq90v4hf (__r, __a, __b, __index); | |
18420 } | |
18421 | |
18422 __extension__ extern __inline float16x8_t | |
18423 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18424 vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, | |
18425 const int __index) | |
18426 { | |
18427 return __builtin_neon_vcmlaq_lane90v8hf (__r, __a, __b, __index); | |
18428 } | |
18429 | |
18430 __extension__ extern __inline float16x8_t | |
18431 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18432 vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, | |
18433 const int __index) | |
18434 { | |
18435 return __builtin_neon_vcmla_lane90v8hf (__r, __a, __b, __index); | |
18436 } | |
18437 | |
18438 __extension__ extern __inline float16x4_t | |
18439 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18440 vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) | |
18441 { | |
18442 return __builtin_neon_vcmla180v4hf (__r, __a, __b); | |
18443 } | |
18444 | |
18445 __extension__ extern __inline float16x8_t | |
18446 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18447 vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) | |
18448 { | |
18449 return __builtin_neon_vcmla180v8hf (__r, __a, __b); | |
18450 } | |
18451 | |
18452 __extension__ extern __inline float16x4_t | |
18453 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18454 vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, | |
18455 const int __index) | |
18456 { | |
18457 return __builtin_neon_vcmla_lane180v4hf (__r, __a, __b, __index); | |
18458 } | |
18459 | |
18460 __extension__ extern __inline float16x4_t | |
18461 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18462 vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, | |
18463 const int __index) | |
18464 { | |
18465 return __builtin_neon_vcmla_laneq180v4hf (__r, __a, __b, __index); | |
18466 } | |
18467 | |
18468 __extension__ extern __inline float16x8_t | |
18469 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18470 vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, | |
18471 const int __index) | |
18472 { | |
18473 return __builtin_neon_vcmlaq_lane180v8hf (__r, __a, __b, __index); | |
18474 } | |
18475 | |
18476 __extension__ extern __inline float16x8_t | |
18477 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18478 vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, | |
18479 const int __index) | |
18480 { | |
18481 return __builtin_neon_vcmla_lane180v8hf (__r, __a, __b, __index); | |
18482 } | |
18483 | |
18484 __extension__ extern __inline float16x4_t | |
18485 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18486 vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) | |
18487 { | |
18488 return __builtin_neon_vcmla270v4hf (__r, __a, __b); | |
18489 } | |
18490 | |
18491 __extension__ extern __inline float16x8_t | |
18492 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18493 vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) | |
18494 { | |
18495 return __builtin_neon_vcmla270v8hf (__r, __a, __b); | |
18496 } | |
18497 | |
18498 __extension__ extern __inline float16x4_t | |
18499 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18500 vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, | |
18501 const int __index) | |
18502 { | |
18503 return __builtin_neon_vcmla_lane270v4hf (__r, __a, __b, __index); | |
18504 } | |
18505 | |
18506 __extension__ extern __inline float16x4_t | |
18507 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18508 vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, | |
18509 const int __index) | |
18510 { | |
18511 return __builtin_neon_vcmla_laneq270v4hf (__r, __a, __b, __index); | |
18512 } | |
18513 | |
18514 __extension__ extern __inline float16x8_t | |
18515 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18516 vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, | |
18517 const int __index) | |
18518 { | |
18519 return __builtin_neon_vcmlaq_lane270v8hf (__r, __a, __b, __index); | |
18520 } | |
18521 | |
18522 __extension__ extern __inline float16x8_t | |
18523 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18524 vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, | |
18525 const int __index) | |
18526 { | |
18527 return __builtin_neon_vcmla_lane270v8hf (__r, __a, __b, __index); | |
18528 } | |
18529 | |
18530 #pragma GCC pop_options | |
18531 #endif | |
18532 | |
18533 __extension__ extern __inline float32x2_t | |
18534 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18535 vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b) | |
18536 { | |
18537 return __builtin_neon_vcadd90v2sf (__a, __b); | |
18538 } | |
18539 | |
18540 __extension__ extern __inline float32x4_t | |
18541 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18542 vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) | |
18543 { | |
18544 return __builtin_neon_vcadd90v4sf (__a, __b); | |
18545 } | |
18546 | |
18547 __extension__ extern __inline float32x2_t | |
18548 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18549 vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b) | |
18550 { | |
18551 return __builtin_neon_vcadd90v2sf (__a, __b); | |
18552 } | |
18553 | |
18554 __extension__ extern __inline float32x4_t | |
18555 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18556 vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) | |
18557 { | |
18558 return __builtin_neon_vcadd90v4sf (__a, __b); | |
18559 } | |
18560 | |
18561 __extension__ extern __inline float32x2_t | |
18562 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18563 vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) | |
18564 { | |
18565 return __builtin_neon_vcmla0v2sf (__r, __a, __b); | |
18566 } | |
18567 | |
18568 __extension__ extern __inline float32x4_t | |
18569 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18570 vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) | |
18571 { | |
18572 return __builtin_neon_vcmla0v4sf (__r, __a, __b); | |
18573 } | |
18574 | |
18575 __extension__ extern __inline float32x2_t | |
18576 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18577 vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, | |
18578 const int __index) | |
18579 { | |
18580 return __builtin_neon_vcmla_lane0v2sf (__r, __a, __b, __index); | |
18581 } | |
18582 | |
18583 __extension__ extern __inline float32x2_t | |
18584 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18585 vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, | |
18586 const int __index) | |
18587 { | |
18588 return __builtin_neon_vcmla_laneq0v2sf (__r, __a, __b, __index); | |
18589 } | |
18590 | |
18591 __extension__ extern __inline float32x4_t | |
18592 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18593 vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, | |
18594 const int __index) | |
18595 { | |
18596 return __builtin_neon_vcmlaq_lane0v4sf (__r, __a, __b, __index); | |
18597 } | |
18598 | |
18599 __extension__ extern __inline float32x4_t | |
18600 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18601 vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, | |
18602 const int __index) | |
18603 { | |
18604 return __builtin_neon_vcmla_lane0v4sf (__r, __a, __b, __index); | |
18605 } | |
18606 | |
18607 __extension__ extern __inline float32x2_t | |
18608 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18609 vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) | |
18610 { | |
18611 return __builtin_neon_vcmla90v2sf (__r, __a, __b); | |
18612 } | |
18613 | |
18614 __extension__ extern __inline float32x4_t | |
18615 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18616 vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) | |
18617 { | |
18618 return __builtin_neon_vcmla90v4sf (__r, __a, __b); | |
18619 } | |
18620 | |
18621 __extension__ extern __inline float32x2_t | |
18622 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18623 vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, | |
18624 const int __index) | |
18625 { | |
18626 return __builtin_neon_vcmla_lane90v2sf (__r, __a, __b, __index); | |
18627 } | |
18628 | |
18629 __extension__ extern __inline float32x2_t | |
18630 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18631 vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, | |
18632 const int __index) | |
18633 { | |
18634 return __builtin_neon_vcmla_laneq90v2sf (__r, __a, __b, __index); | |
18635 } | |
18636 | |
18637 __extension__ extern __inline float32x4_t | |
18638 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18639 vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, | |
18640 const int __index) | |
18641 { | |
18642 return __builtin_neon_vcmlaq_lane90v4sf (__r, __a, __b, __index); | |
18643 } | |
18644 | |
18645 __extension__ extern __inline float32x4_t | |
18646 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18647 vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, | |
18648 const int __index) | |
18649 { | |
18650 return __builtin_neon_vcmla_lane90v4sf (__r, __a, __b, __index); | |
18651 } | |
18652 | |
18653 __extension__ extern __inline float32x2_t | |
18654 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18655 vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) | |
18656 { | |
18657 return __builtin_neon_vcmla180v2sf (__r, __a, __b); | |
18658 } | |
18659 | |
18660 __extension__ extern __inline float32x4_t | |
18661 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18662 vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) | |
18663 { | |
18664 return __builtin_neon_vcmla180v4sf (__r, __a, __b); | |
18665 } | |
18666 | |
18667 __extension__ extern __inline float32x2_t | |
18668 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18669 vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, | |
18670 const int __index) | |
18671 { | |
18672 return __builtin_neon_vcmla_lane180v2sf (__r, __a, __b, __index); | |
18673 } | |
18674 | |
18675 __extension__ extern __inline float32x2_t | |
18676 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18677 vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, | |
18678 const int __index) | |
18679 { | |
18680 return __builtin_neon_vcmla_laneq180v2sf (__r, __a, __b, __index); | |
18681 } | |
18682 | |
18683 __extension__ extern __inline float32x4_t | |
18684 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18685 vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, | |
18686 const int __index) | |
18687 { | |
18688 return __builtin_neon_vcmlaq_lane180v4sf (__r, __a, __b, __index); | |
18689 } | |
18690 | |
18691 __extension__ extern __inline float32x4_t | |
18692 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18693 vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, | |
18694 const int __index) | |
18695 { | |
18696 return __builtin_neon_vcmla_lane180v4sf (__r, __a, __b, __index); | |
18697 } | |
18698 | |
18699 __extension__ extern __inline float32x2_t | |
18700 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18701 vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) | |
18702 { | |
18703 return __builtin_neon_vcmla270v2sf (__r, __a, __b); | |
18704 } | |
18705 | |
18706 __extension__ extern __inline float32x4_t | |
18707 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18708 vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) | |
18709 { | |
18710 return __builtin_neon_vcmla270v4sf (__r, __a, __b); | |
18711 } | |
18712 | |
18713 __extension__ extern __inline float32x2_t | |
18714 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18715 vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, | |
18716 const int __index) | |
18717 { | |
18718 return __builtin_neon_vcmla_lane270v2sf (__r, __a, __b, __index); | |
18719 } | |
18720 | |
18721 __extension__ extern __inline float32x2_t | |
18722 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18723 vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, | |
18724 const int __index) | |
18725 { | |
18726 return __builtin_neon_vcmla_laneq270v2sf (__r, __a, __b, __index); | |
18727 } | |
18728 | |
18729 __extension__ extern __inline float32x4_t | |
18730 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18731 vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, | |
18732 const int __index) | |
18733 { | |
18734 return __builtin_neon_vcmlaq_lane270v4sf (__r, __a, __b, __index); | |
18735 } | |
18736 | |
18737 __extension__ extern __inline float32x4_t | |
18738 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18739 vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, | |
18740 const int __index) | |
18741 { | |
18742 return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index); | |
18743 } | |
18744 | |
18745 | |
18746 /* AdvSIMD Matrix Multiply-Accumulate and Dot Product intrinsics. */ | |
18747 #pragma GCC push_options | |
18748 #pragma GCC target ("arch=armv8.2-a+i8mm") | |
18749 | |
18750 __extension__ extern __inline int32x2_t | |
18751 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18752 vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) | |
18753 { | |
18754 return __builtin_neon_usdotv8qi_ssus (__r, __a, __b); | |
18755 } | |
18756 | |
18757 __extension__ extern __inline int32x2_t | |
18758 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18759 vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, | |
18760 int8x8_t __b, const int __index) | |
18761 { | |
18762 return __builtin_neon_usdot_lanev8qi_ssuss (__r, __a, __b, __index); | |
18763 } | |
18764 | |
18765 __extension__ extern __inline int32x4_t | |
18766 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18767 vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, | |
18768 int8x8_t __b, const int __index) | |
18769 { | |
18770 return __builtin_neon_usdot_lanev16qi_ssuss (__r, __a, __b, __index); | |
18771 } | |
18772 | |
18773 __extension__ extern __inline int32x2_t | |
18774 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18775 vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, | |
18776 uint8x8_t __b, const int __index) | |
18777 { | |
18778 return __builtin_neon_sudot_lanev8qi_sssus (__r, __a, __b, __index); | |
18779 } | |
18780 | |
18781 __extension__ extern __inline int32x4_t | |
18782 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) | |
18783 vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, | |
18784 uint8x8_t __b, const int __index) | |
18785 { | |
18786 return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index); | |
18787 } | |
18788 | |
18789 #pragma GCC pop_options | |
18790 | |
18791 #pragma GCC pop_options | |
18792 #endif | |
18793 | |
18310 #ifdef __cplusplus | 18794 #ifdef __cplusplus |
18311 } | 18795 } |
18312 #endif | 18796 #endif |
18313 | 18797 |
18314 #pragma GCC pop_options | 18798 #pragma GCC pop_options |