131
|
1 /* Copyright (C) 2014-2018 Free Software Foundation, Inc.
|
111
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify
|
|
6 it under the terms of the GNU General Public License as published by
|
|
7 the Free Software Foundation; either version 3, or (at your option)
|
|
8 any later version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU General Public License for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 #ifndef _IMMINTRIN_H_INCLUDED
|
|
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
|
|
26 #endif
|
|
27
|
|
28 #ifndef _AVX512DQINTRIN_H_INCLUDED
|
|
29 #define _AVX512DQINTRIN_H_INCLUDED
|
|
30
|
|
31 #ifndef __AVX512DQ__
|
|
32 #pragma GCC push_options
|
|
33 #pragma GCC target("avx512dq")
|
|
34 #define __DISABLE_AVX512DQ__
|
|
35 #endif /* __AVX512DQ__ */
|
|
36
|
|
37 extern __inline unsigned char
|
|
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
39 _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
|
|
40 {
|
|
41 *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
|
|
42 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
|
|
43 }
|
|
44
|
|
45 extern __inline unsigned char
|
|
46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
47 _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
|
|
48 {
|
|
49 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
|
|
50 }
|
|
51
|
|
52 extern __inline unsigned char
|
|
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
54 _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
|
|
55 {
|
|
56 return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
|
|
57 }
|
|
58
|
|
59 extern __inline unsigned char
|
|
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
61 _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
|
|
62 {
|
|
63 *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
|
|
64 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
|
|
65 }
|
|
66
|
|
67 extern __inline unsigned char
|
|
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
69 _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
|
|
70 {
|
|
71 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
|
|
72 }
|
|
73
|
|
74 extern __inline unsigned char
|
|
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
76 _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
|
|
77 {
|
|
78 return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
|
|
79 }
|
|
80
|
|
81 extern __inline unsigned char
|
|
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
83 _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
|
|
84 {
|
|
85 *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
|
|
86 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
|
|
87 }
|
|
88
|
|
89 extern __inline unsigned char
|
|
90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
91 _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
|
|
92 {
|
|
93 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
|
|
94 }
|
|
95
|
|
96 extern __inline unsigned char
|
|
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
98 _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
|
|
99 {
|
|
100 return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
|
|
101 }
|
|
102
|
|
103 extern __inline __mmask8
|
|
104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
105 _kadd_mask8 (__mmask8 __A, __mmask8 __B)
|
|
106 {
|
|
107 return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
|
|
108 }
|
|
109
|
|
110 extern __inline __mmask16
|
|
111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
112 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
|
|
113 {
|
|
114 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
|
|
115 }
|
|
116
|
|
117 extern __inline unsigned int
|
|
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
119 _cvtmask8_u32 (__mmask8 __A)
|
|
120 {
|
|
121 return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
|
|
122 }
|
|
123
|
|
124 extern __inline __mmask8
|
|
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
126 _cvtu32_mask8 (unsigned int __A)
|
|
127 {
|
|
128 return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
|
|
129 }
|
|
130
|
|
131 extern __inline __mmask8
|
|
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
133 _load_mask8 (__mmask8 *__A)
|
|
134 {
|
|
135 return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
|
|
136 }
|
|
137
|
|
138 extern __inline void
|
|
139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
140 _store_mask8 (__mmask8 *__A, __mmask8 __B)
|
|
141 {
|
|
142 *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
|
|
143 }
|
|
144
|
|
145 extern __inline __mmask8
|
|
146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
147 _knot_mask8 (__mmask8 __A)
|
|
148 {
|
|
149 return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
|
|
150 }
|
|
151
|
|
152 extern __inline __mmask8
|
|
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
154 _kor_mask8 (__mmask8 __A, __mmask8 __B)
|
|
155 {
|
|
156 return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
|
|
157 }
|
|
158
|
|
159 extern __inline __mmask8
|
|
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
161 _kxnor_mask8 (__mmask8 __A, __mmask8 __B)
|
|
162 {
|
|
163 return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
|
|
164 }
|
|
165
|
|
166 extern __inline __mmask8
|
|
167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
168 _kxor_mask8 (__mmask8 __A, __mmask8 __B)
|
|
169 {
|
|
170 return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
|
|
171 }
|
|
172
|
|
173 extern __inline __mmask8
|
|
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
175 _kand_mask8 (__mmask8 __A, __mmask8 __B)
|
|
176 {
|
|
177 return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
|
|
178 }
|
|
179
|
|
180 extern __inline __mmask8
|
|
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
182 _kandn_mask8 (__mmask8 __A, __mmask8 __B)
|
|
183 {
|
|
184 return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
|
|
185 }
|
|
186
|
|
187 extern __inline __m512d
|
|
188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
189 _mm512_broadcast_f64x2 (__m128d __A)
|
|
190 {
|
|
191 return (__m512d)
|
|
192 __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
|
|
193 _mm512_undefined_pd (),
|
|
194 (__mmask8) -1);
|
|
195 }
|
|
196
|
|
197 extern __inline __m512d
|
|
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
199 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
|
|
200 {
|
|
201 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
|
|
202 __A,
|
|
203 (__v8df)
|
|
204 __O, __M);
|
|
205 }
|
|
206
|
|
207 extern __inline __m512d
|
|
208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
209 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
|
|
210 {
|
|
211 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
|
|
212 __A,
|
|
213 (__v8df)
|
|
214 _mm512_setzero_ps (),
|
|
215 __M);
|
|
216 }
|
|
217
|
|
218 extern __inline __m512i
|
|
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
220 _mm512_broadcast_i64x2 (__m128i __A)
|
|
221 {
|
|
222 return (__m512i)
|
|
223 __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
|
|
224 _mm512_undefined_epi32 (),
|
|
225 (__mmask8) -1);
|
|
226 }
|
|
227
|
|
228 extern __inline __m512i
|
|
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
230 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
|
|
231 {
|
|
232 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
|
|
233 __A,
|
|
234 (__v8di)
|
|
235 __O, __M);
|
|
236 }
|
|
237
|
|
238 extern __inline __m512i
|
|
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
240 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
|
|
241 {
|
|
242 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
|
|
243 __A,
|
|
244 (__v8di)
|
|
245 _mm512_setzero_si512 (),
|
|
246 __M);
|
|
247 }
|
|
248
|
|
249 extern __inline __m512
|
|
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
251 _mm512_broadcast_f32x2 (__m128 __A)
|
|
252 {
|
|
253 return (__m512)
|
|
254 __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
|
255 (__v16sf)_mm512_undefined_ps (),
|
|
256 (__mmask16) -1);
|
|
257 }
|
|
258
|
|
259 extern __inline __m512
|
|
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
261 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
|
262 {
|
|
263 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
|
264 (__v16sf)
|
|
265 __O, __M);
|
|
266 }
|
|
267
|
|
268 extern __inline __m512
|
|
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
270 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
|
271 {
|
|
272 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
|
273 (__v16sf)
|
|
274 _mm512_setzero_ps (),
|
|
275 __M);
|
|
276 }
|
|
277
|
|
278 extern __inline __m512i
|
|
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
280 _mm512_broadcast_i32x2 (__m128i __A)
|
|
281 {
|
|
282 return (__m512i)
|
|
283 __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
|
|
284 (__v16si)
|
|
285 _mm512_undefined_epi32 (),
|
|
286 (__mmask16) -1);
|
|
287 }
|
|
288
|
|
289 extern __inline __m512i
|
|
290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
291 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
|
292 {
|
|
293 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
|
|
294 __A,
|
|
295 (__v16si)
|
|
296 __O, __M);
|
|
297 }
|
|
298
|
|
299 extern __inline __m512i
|
|
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
301 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
|
302 {
|
|
303 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
|
|
304 __A,
|
|
305 (__v16si)
|
|
306 _mm512_setzero_si512 (),
|
|
307 __M);
|
|
308 }
|
|
309
|
|
310 extern __inline __m512
|
|
311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
312 _mm512_broadcast_f32x8 (__m256 __A)
|
|
313 {
|
|
314 return (__m512)
|
|
315 __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
|
|
316 _mm512_undefined_ps (),
|
|
317 (__mmask16) -1);
|
|
318 }
|
|
319
|
|
320 extern __inline __m512
|
|
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
322 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
|
|
323 {
|
|
324 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
|
|
325 (__v16sf)__O,
|
|
326 __M);
|
|
327 }
|
|
328
|
|
329 extern __inline __m512
|
|
330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
331 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
|
|
332 {
|
|
333 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
|
|
334 (__v16sf)
|
|
335 _mm512_setzero_ps (),
|
|
336 __M);
|
|
337 }
|
|
338
|
|
339 extern __inline __m512i
|
|
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
341 _mm512_broadcast_i32x8 (__m256i __A)
|
|
342 {
|
|
343 return (__m512i)
|
|
344 __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
|
|
345 (__v16si)
|
|
346 _mm512_undefined_epi32 (),
|
|
347 (__mmask16) -1);
|
|
348 }
|
|
349
|
|
350 extern __inline __m512i
|
|
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
352 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
|
|
353 {
|
|
354 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
|
|
355 __A,
|
|
356 (__v16si)__O,
|
|
357 __M);
|
|
358 }
|
|
359
|
|
360 extern __inline __m512i
|
|
361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
362 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
|
|
363 {
|
|
364 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
|
|
365 __A,
|
|
366 (__v16si)
|
|
367 _mm512_setzero_si512 (),
|
|
368 __M);
|
|
369 }
|
|
370
|
|
371 extern __inline __m512i
|
|
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
373 _mm512_mullo_epi64 (__m512i __A, __m512i __B)
|
|
374 {
|
|
375 return (__m512i) ((__v8du) __A * (__v8du) __B);
|
|
376 }
|
|
377
|
|
378 extern __inline __m512i
|
|
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
380 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
|
|
381 __m512i __B)
|
|
382 {
|
|
383 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
|
|
384 (__v8di) __B,
|
|
385 (__v8di) __W,
|
|
386 (__mmask8) __U);
|
|
387 }
|
|
388
|
|
389 extern __inline __m512i
|
|
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
391 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
|
|
392 {
|
|
393 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
|
|
394 (__v8di) __B,
|
|
395 (__v8di)
|
|
396 _mm512_setzero_si512 (),
|
|
397 (__mmask8) __U);
|
|
398 }
|
|
399
|
|
400 extern __inline __m512d
|
|
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
402 _mm512_xor_pd (__m512d __A, __m512d __B)
|
|
403 {
|
|
404 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
|
|
405 (__v8df) __B,
|
|
406 (__v8df)
|
|
407 _mm512_setzero_pd (),
|
|
408 (__mmask8) -1);
|
|
409 }
|
|
410
|
|
411 extern __inline __m512d
|
|
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
413 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
|
|
414 __m512d __B)
|
|
415 {
|
|
416 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
|
|
417 (__v8df) __B,
|
|
418 (__v8df) __W,
|
|
419 (__mmask8) __U);
|
|
420 }
|
|
421
|
|
422 extern __inline __m512d
|
|
423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
424 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
|
|
425 {
|
|
426 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
|
|
427 (__v8df) __B,
|
|
428 (__v8df)
|
|
429 _mm512_setzero_pd (),
|
|
430 (__mmask8) __U);
|
|
431 }
|
|
432
|
|
433 extern __inline __m512
|
|
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
435 _mm512_xor_ps (__m512 __A, __m512 __B)
|
|
436 {
|
|
437 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
|
|
438 (__v16sf) __B,
|
|
439 (__v16sf)
|
|
440 _mm512_setzero_ps (),
|
|
441 (__mmask16) -1);
|
|
442 }
|
|
443
|
|
444 extern __inline __m512
|
|
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
446 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
|
|
447 {
|
|
448 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
|
|
449 (__v16sf) __B,
|
|
450 (__v16sf) __W,
|
|
451 (__mmask16) __U);
|
|
452 }
|
|
453
|
|
454 extern __inline __m512
|
|
455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
456 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
|
457 {
|
|
458 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
|
|
459 (__v16sf) __B,
|
|
460 (__v16sf)
|
|
461 _mm512_setzero_ps (),
|
|
462 (__mmask16) __U);
|
|
463 }
|
|
464
|
|
465 extern __inline __m512d
|
|
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
467 _mm512_or_pd (__m512d __A, __m512d __B)
|
|
468 {
|
|
469 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
|
|
470 (__v8df) __B,
|
|
471 (__v8df)
|
|
472 _mm512_setzero_pd (),
|
|
473 (__mmask8) -1);
|
|
474 }
|
|
475
|
|
476 extern __inline __m512d
|
|
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
478 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
|
|
479 {
|
|
480 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
|
|
481 (__v8df) __B,
|
|
482 (__v8df) __W,
|
|
483 (__mmask8) __U);
|
|
484 }
|
|
485
|
|
486 extern __inline __m512d
|
|
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
488 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
|
|
489 {
|
|
490 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
|
|
491 (__v8df) __B,
|
|
492 (__v8df)
|
|
493 _mm512_setzero_pd (),
|
|
494 (__mmask8) __U);
|
|
495 }
|
|
496
|
|
497 extern __inline __m512
|
|
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
499 _mm512_or_ps (__m512 __A, __m512 __B)
|
|
500 {
|
|
501 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
|
|
502 (__v16sf) __B,
|
|
503 (__v16sf)
|
|
504 _mm512_setzero_ps (),
|
|
505 (__mmask16) -1);
|
|
506 }
|
|
507
|
|
508 extern __inline __m512
|
|
509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
510 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
|
|
511 {
|
|
512 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
|
|
513 (__v16sf) __B,
|
|
514 (__v16sf) __W,
|
|
515 (__mmask16) __U);
|
|
516 }
|
|
517
|
|
518 extern __inline __m512
|
|
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
520 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
|
521 {
|
|
522 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
|
|
523 (__v16sf) __B,
|
|
524 (__v16sf)
|
|
525 _mm512_setzero_ps (),
|
|
526 (__mmask16) __U);
|
|
527 }
|
|
528
|
|
529 extern __inline __m512d
|
|
530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
531 _mm512_and_pd (__m512d __A, __m512d __B)
|
|
532 {
|
|
533 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
|
|
534 (__v8df) __B,
|
|
535 (__v8df)
|
|
536 _mm512_setzero_pd (),
|
|
537 (__mmask8) -1);
|
|
538 }
|
|
539
|
|
540 extern __inline __m512d
|
|
541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
542 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
|
|
543 __m512d __B)
|
|
544 {
|
|
545 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
|
|
546 (__v8df) __B,
|
|
547 (__v8df) __W,
|
|
548 (__mmask8) __U);
|
|
549 }
|
|
550
|
|
551 extern __inline __m512d
|
|
552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
553 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
|
|
554 {
|
|
555 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
|
|
556 (__v8df) __B,
|
|
557 (__v8df)
|
|
558 _mm512_setzero_pd (),
|
|
559 (__mmask8) __U);
|
|
560 }
|
|
561
|
|
562 extern __inline __m512
|
|
563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
564 _mm512_and_ps (__m512 __A, __m512 __B)
|
|
565 {
|
|
566 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
|
|
567 (__v16sf) __B,
|
|
568 (__v16sf)
|
|
569 _mm512_setzero_ps (),
|
|
570 (__mmask16) -1);
|
|
571 }
|
|
572
|
|
573 extern __inline __m512
|
|
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
575 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
|
|
576 {
|
|
577 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
|
|
578 (__v16sf) __B,
|
|
579 (__v16sf) __W,
|
|
580 (__mmask16) __U);
|
|
581 }
|
|
582
|
|
583 extern __inline __m512
|
|
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
585 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
|
586 {
|
|
587 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
|
|
588 (__v16sf) __B,
|
|
589 (__v16sf)
|
|
590 _mm512_setzero_ps (),
|
|
591 (__mmask16) __U);
|
|
592 }
|
|
593
|
|
594 extern __inline __m512d
|
|
595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
596 _mm512_andnot_pd (__m512d __A, __m512d __B)
|
|
597 {
|
|
598 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
|
|
599 (__v8df) __B,
|
|
600 (__v8df)
|
|
601 _mm512_setzero_pd (),
|
|
602 (__mmask8) -1);
|
|
603 }
|
|
604
|
|
605 extern __inline __m512d
|
|
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
607 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
|
|
608 __m512d __B)
|
|
609 {
|
|
610 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
|
|
611 (__v8df) __B,
|
|
612 (__v8df) __W,
|
|
613 (__mmask8) __U);
|
|
614 }
|
|
615
|
|
616 extern __inline __m512d
|
|
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
618 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
|
|
619 {
|
|
620 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
|
|
621 (__v8df) __B,
|
|
622 (__v8df)
|
|
623 _mm512_setzero_pd (),
|
|
624 (__mmask8) __U);
|
|
625 }
|
|
626
|
|
627 extern __inline __m512
|
|
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
629 _mm512_andnot_ps (__m512 __A, __m512 __B)
|
|
630 {
|
|
631 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
|
|
632 (__v16sf) __B,
|
|
633 (__v16sf)
|
|
634 _mm512_setzero_ps (),
|
|
635 (__mmask16) -1);
|
|
636 }
|
|
637
|
|
638 extern __inline __m512
|
|
639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
640 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
|
|
641 __m512 __B)
|
|
642 {
|
|
643 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
|
|
644 (__v16sf) __B,
|
|
645 (__v16sf) __W,
|
|
646 (__mmask16) __U);
|
|
647 }
|
|
648
|
|
649 extern __inline __m512
|
|
650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
651 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
|
652 {
|
|
653 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
|
|
654 (__v16sf) __B,
|
|
655 (__v16sf)
|
|
656 _mm512_setzero_ps (),
|
|
657 (__mmask16) __U);
|
|
658 }
|
|
659
|
|
660 extern __inline __mmask16
|
|
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
662 _mm512_movepi32_mask (__m512i __A)
|
|
663 {
|
|
664 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
|
|
665 }
|
|
666
|
|
667 extern __inline __mmask8
|
|
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
669 _mm512_movepi64_mask (__m512i __A)
|
|
670 {
|
|
671 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
|
|
672 }
|
|
673
|
|
674 extern __inline __m512i
|
|
675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
676 _mm512_movm_epi32 (__mmask16 __A)
|
|
677 {
|
|
678 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
|
|
679 }
|
|
680
|
|
681 extern __inline __m512i
|
|
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
683 _mm512_movm_epi64 (__mmask8 __A)
|
|
684 {
|
|
685 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
|
|
686 }
|
|
687
|
|
688 extern __inline __m512i
|
|
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
690 _mm512_cvttpd_epi64 (__m512d __A)
|
|
691 {
|
|
692 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
693 (__v8di)
|
|
694 _mm512_setzero_si512 (),
|
|
695 (__mmask8) -1,
|
|
696 _MM_FROUND_CUR_DIRECTION);
|
|
697 }
|
|
698
|
|
699 extern __inline __m512i
|
|
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
701 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
|
|
702 {
|
|
703 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
704 (__v8di) __W,
|
|
705 (__mmask8) __U,
|
|
706 _MM_FROUND_CUR_DIRECTION);
|
|
707 }
|
|
708
|
|
709 extern __inline __m512i
|
|
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
711 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
|
|
712 {
|
|
713 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
714 (__v8di)
|
|
715 _mm512_setzero_si512 (),
|
|
716 (__mmask8) __U,
|
|
717 _MM_FROUND_CUR_DIRECTION);
|
|
718 }
|
|
719
|
|
720 extern __inline __m512i
|
|
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
722 _mm512_cvttpd_epu64 (__m512d __A)
|
|
723 {
|
|
724 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
725 (__v8di)
|
|
726 _mm512_setzero_si512 (),
|
|
727 (__mmask8) -1,
|
|
728 _MM_FROUND_CUR_DIRECTION);
|
|
729 }
|
|
730
|
|
731 extern __inline __m512i
|
|
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
733 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
|
|
734 {
|
|
735 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
736 (__v8di) __W,
|
|
737 (__mmask8) __U,
|
|
738 _MM_FROUND_CUR_DIRECTION);
|
|
739 }
|
|
740
|
|
741 extern __inline __m512i
|
|
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
743 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
|
|
744 {
|
|
745 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
746 (__v8di)
|
|
747 _mm512_setzero_si512 (),
|
|
748 (__mmask8) __U,
|
|
749 _MM_FROUND_CUR_DIRECTION);
|
|
750 }
|
|
751
|
|
752 extern __inline __m512i
|
|
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
754 _mm512_cvttps_epi64 (__m256 __A)
|
|
755 {
|
|
756 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
757 (__v8di)
|
|
758 _mm512_setzero_si512 (),
|
|
759 (__mmask8) -1,
|
|
760 _MM_FROUND_CUR_DIRECTION);
|
|
761 }
|
|
762
|
|
763 extern __inline __m512i
|
|
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
765 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
|
|
766 {
|
|
767 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
768 (__v8di) __W,
|
|
769 (__mmask8) __U,
|
|
770 _MM_FROUND_CUR_DIRECTION);
|
|
771 }
|
|
772
|
|
773 extern __inline __m512i
|
|
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
775 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
|
|
776 {
|
|
777 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
778 (__v8di)
|
|
779 _mm512_setzero_si512 (),
|
|
780 (__mmask8) __U,
|
|
781 _MM_FROUND_CUR_DIRECTION);
|
|
782 }
|
|
783
|
|
784 extern __inline __m512i
|
|
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
786 _mm512_cvttps_epu64 (__m256 __A)
|
|
787 {
|
|
788 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
789 (__v8di)
|
|
790 _mm512_setzero_si512 (),
|
|
791 (__mmask8) -1,
|
|
792 _MM_FROUND_CUR_DIRECTION);
|
|
793 }
|
|
794
|
|
795 extern __inline __m512i
|
|
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
797 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
|
|
798 {
|
|
799 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
800 (__v8di) __W,
|
|
801 (__mmask8) __U,
|
|
802 _MM_FROUND_CUR_DIRECTION);
|
|
803 }
|
|
804
|
|
805 extern __inline __m512i
|
|
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
807 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
|
|
808 {
|
|
809 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
810 (__v8di)
|
|
811 _mm512_setzero_si512 (),
|
|
812 (__mmask8) __U,
|
|
813 _MM_FROUND_CUR_DIRECTION);
|
|
814 }
|
|
815
|
|
816 extern __inline __m512i
|
|
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
818 _mm512_cvtpd_epi64 (__m512d __A)
|
|
819 {
|
|
820 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
821 (__v8di)
|
|
822 _mm512_setzero_si512 (),
|
|
823 (__mmask8) -1,
|
|
824 _MM_FROUND_CUR_DIRECTION);
|
|
825 }
|
|
826
|
|
827 extern __inline __m512i
|
|
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
829 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
|
|
830 {
|
|
831 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
832 (__v8di) __W,
|
|
833 (__mmask8) __U,
|
|
834 _MM_FROUND_CUR_DIRECTION);
|
|
835 }
|
|
836
|
|
837 extern __inline __m512i
|
|
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
839 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
|
|
840 {
|
|
841 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
842 (__v8di)
|
|
843 _mm512_setzero_si512 (),
|
|
844 (__mmask8) __U,
|
|
845 _MM_FROUND_CUR_DIRECTION);
|
|
846 }
|
|
847
|
|
848 extern __inline __m512i
|
|
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
850 _mm512_cvtpd_epu64 (__m512d __A)
|
|
851 {
|
|
852 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
853 (__v8di)
|
|
854 _mm512_setzero_si512 (),
|
|
855 (__mmask8) -1,
|
|
856 _MM_FROUND_CUR_DIRECTION);
|
|
857 }
|
|
858
|
|
859 extern __inline __m512i
|
|
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
861 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
|
|
862 {
|
|
863 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
864 (__v8di) __W,
|
|
865 (__mmask8) __U,
|
|
866 _MM_FROUND_CUR_DIRECTION);
|
|
867 }
|
|
868
|
|
869 extern __inline __m512i
|
|
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
871 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
|
|
872 {
|
|
873 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
874 (__v8di)
|
|
875 _mm512_setzero_si512 (),
|
|
876 (__mmask8) __U,
|
|
877 _MM_FROUND_CUR_DIRECTION);
|
|
878 }
|
|
879
|
|
880 extern __inline __m512i
|
|
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
882 _mm512_cvtps_epi64 (__m256 __A)
|
|
883 {
|
|
884 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
885 (__v8di)
|
|
886 _mm512_setzero_si512 (),
|
|
887 (__mmask8) -1,
|
|
888 _MM_FROUND_CUR_DIRECTION);
|
|
889 }
|
|
890
|
|
891 extern __inline __m512i
|
|
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
893 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
|
|
894 {
|
|
895 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
896 (__v8di) __W,
|
|
897 (__mmask8) __U,
|
|
898 _MM_FROUND_CUR_DIRECTION);
|
|
899 }
|
|
900
|
|
901 extern __inline __m512i
|
|
902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
903 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
|
|
904 {
|
|
905 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
906 (__v8di)
|
|
907 _mm512_setzero_si512 (),
|
|
908 (__mmask8) __U,
|
|
909 _MM_FROUND_CUR_DIRECTION);
|
|
910 }
|
|
911
|
|
912 extern __inline __m512i
|
|
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
914 _mm512_cvtps_epu64 (__m256 __A)
|
|
915 {
|
|
916 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
917 (__v8di)
|
|
918 _mm512_setzero_si512 (),
|
|
919 (__mmask8) -1,
|
|
920 _MM_FROUND_CUR_DIRECTION);
|
|
921 }
|
|
922
|
|
923 extern __inline __m512i
|
|
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
925 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
|
|
926 {
|
|
927 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
928 (__v8di) __W,
|
|
929 (__mmask8) __U,
|
|
930 _MM_FROUND_CUR_DIRECTION);
|
|
931 }
|
|
932
|
|
933 extern __inline __m512i
|
|
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
935 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
|
|
936 {
|
|
937 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
938 (__v8di)
|
|
939 _mm512_setzero_si512 (),
|
|
940 (__mmask8) __U,
|
|
941 _MM_FROUND_CUR_DIRECTION);
|
|
942 }
|
|
943
|
|
944 extern __inline __m256
|
|
945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
946 _mm512_cvtepi64_ps (__m512i __A)
|
|
947 {
|
|
948 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
949 (__v8sf)
|
|
950 _mm256_setzero_ps (),
|
|
951 (__mmask8) -1,
|
|
952 _MM_FROUND_CUR_DIRECTION);
|
|
953 }
|
|
954
|
|
955 extern __inline __m256
|
|
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
957 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
|
|
958 {
|
|
959 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
960 (__v8sf) __W,
|
|
961 (__mmask8) __U,
|
|
962 _MM_FROUND_CUR_DIRECTION);
|
|
963 }
|
|
964
|
|
965 extern __inline __m256
|
|
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
967 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
|
|
968 {
|
|
969 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
970 (__v8sf)
|
|
971 _mm256_setzero_ps (),
|
|
972 (__mmask8) __U,
|
|
973 _MM_FROUND_CUR_DIRECTION);
|
|
974 }
|
|
975
|
|
976 extern __inline __m256
|
|
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
978 _mm512_cvtepu64_ps (__m512i __A)
|
|
979 {
|
|
980 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
981 (__v8sf)
|
|
982 _mm256_setzero_ps (),
|
|
983 (__mmask8) -1,
|
|
984 _MM_FROUND_CUR_DIRECTION);
|
|
985 }
|
|
986
|
|
987 extern __inline __m256
|
|
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
989 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
|
|
990 {
|
|
991 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
992 (__v8sf) __W,
|
|
993 (__mmask8) __U,
|
|
994 _MM_FROUND_CUR_DIRECTION);
|
|
995 }
|
|
996
|
|
997 extern __inline __m256
|
|
998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
999 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
|
|
1000 {
|
|
1001 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
1002 (__v8sf)
|
|
1003 _mm256_setzero_ps (),
|
|
1004 (__mmask8) __U,
|
|
1005 _MM_FROUND_CUR_DIRECTION);
|
|
1006 }
|
|
1007
|
|
1008 extern __inline __m512d
|
|
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1010 _mm512_cvtepi64_pd (__m512i __A)
|
|
1011 {
|
|
1012 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1013 (__v8df)
|
|
1014 _mm512_setzero_pd (),
|
|
1015 (__mmask8) -1,
|
|
1016 _MM_FROUND_CUR_DIRECTION);
|
|
1017 }
|
|
1018
|
|
1019 extern __inline __m512d
|
|
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1021 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
|
|
1022 {
|
|
1023 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1024 (__v8df) __W,
|
|
1025 (__mmask8) __U,
|
|
1026 _MM_FROUND_CUR_DIRECTION);
|
|
1027 }
|
|
1028
|
|
1029 extern __inline __m512d
|
|
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1031 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
|
|
1032 {
|
|
1033 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1034 (__v8df)
|
|
1035 _mm512_setzero_pd (),
|
|
1036 (__mmask8) __U,
|
|
1037 _MM_FROUND_CUR_DIRECTION);
|
|
1038 }
|
|
1039
|
|
1040 extern __inline __m512d
|
|
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1042 _mm512_cvtepu64_pd (__m512i __A)
|
|
1043 {
|
|
1044 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1045 (__v8df)
|
|
1046 _mm512_setzero_pd (),
|
|
1047 (__mmask8) -1,
|
|
1048 _MM_FROUND_CUR_DIRECTION);
|
|
1049 }
|
|
1050
|
|
1051 extern __inline __m512d
|
|
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1053 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
|
|
1054 {
|
|
1055 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1056 (__v8df) __W,
|
|
1057 (__mmask8) __U,
|
|
1058 _MM_FROUND_CUR_DIRECTION);
|
|
1059 }
|
|
1060
|
|
1061 extern __inline __m512d
|
|
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1063 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
|
|
1064 {
|
|
1065 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1066 (__v8df)
|
|
1067 _mm512_setzero_pd (),
|
|
1068 (__mmask8) __U,
|
|
1069 _MM_FROUND_CUR_DIRECTION);
|
|
1070 }
|
|
1071
|
|
1072 #ifdef __OPTIMIZE__
|
|
1073 extern __inline __mmask8
|
|
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1075 _kshiftli_mask8 (__mmask8 __A, unsigned int __B)
|
|
1076 {
|
|
1077 return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
|
|
1078 }
|
|
1079
|
|
1080 extern __inline __mmask8
|
|
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1082 _kshiftri_mask8 (__mmask8 __A, unsigned int __B)
|
|
1083 {
|
|
1084 return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
|
|
1085 }
|
|
1086
|
|
1087 extern __inline __m512d
|
|
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1089 _mm512_range_pd (__m512d __A, __m512d __B, int __C)
|
|
1090 {
|
|
1091 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
1092 (__v8df) __B, __C,
|
|
1093 (__v8df)
|
|
1094 _mm512_setzero_pd (),
|
|
1095 (__mmask8) -1,
|
|
1096 _MM_FROUND_CUR_DIRECTION);
|
|
1097 }
|
|
1098
|
|
1099 extern __inline __m512d
|
|
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1101 _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
|
|
1102 __m512d __A, __m512d __B, int __C)
|
|
1103 {
|
|
1104 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
1105 (__v8df) __B, __C,
|
|
1106 (__v8df) __W,
|
|
1107 (__mmask8) __U,
|
|
1108 _MM_FROUND_CUR_DIRECTION);
|
|
1109 }
|
|
1110
|
|
1111 extern __inline __m512d
|
|
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1113 _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
|
|
1114 {
|
|
1115 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
1116 (__v8df) __B, __C,
|
|
1117 (__v8df)
|
|
1118 _mm512_setzero_pd (),
|
|
1119 (__mmask8) __U,
|
|
1120 _MM_FROUND_CUR_DIRECTION);
|
|
1121 }
|
|
1122
|
|
1123 extern __inline __m512
|
|
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1125 _mm512_range_ps (__m512 __A, __m512 __B, int __C)
|
|
1126 {
|
|
1127 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
1128 (__v16sf) __B, __C,
|
|
1129 (__v16sf)
|
|
1130 _mm512_setzero_ps (),
|
|
1131 (__mmask16) -1,
|
|
1132 _MM_FROUND_CUR_DIRECTION);
|
|
1133 }
|
|
1134
|
|
1135 extern __inline __m512
|
|
1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1137 _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
|
|
1138 __m512 __A, __m512 __B, int __C)
|
|
1139 {
|
|
1140 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
1141 (__v16sf) __B, __C,
|
|
1142 (__v16sf) __W,
|
|
1143 (__mmask16) __U,
|
|
1144 _MM_FROUND_CUR_DIRECTION);
|
|
1145 }
|
|
1146
|
|
1147 extern __inline __m512
|
|
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1149 _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
|
|
1150 {
|
|
1151 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
1152 (__v16sf) __B, __C,
|
|
1153 (__v16sf)
|
|
1154 _mm512_setzero_ps (),
|
|
1155 (__mmask16) __U,
|
|
1156 _MM_FROUND_CUR_DIRECTION);
|
|
1157 }
|
|
1158
|
|
1159 extern __inline __m128d
|
|
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1161 _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
|
|
1162 {
|
|
1163 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
|
|
1164 (__v2df) __B, __C,
|
|
1165 (__v2df) _mm_setzero_pd (),
|
|
1166 (__mmask8) -1);
|
|
1167 }
|
|
1168
|
|
1169 extern __inline __m128d
|
|
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1171 _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
|
1172 __m128d __B, int __C)
|
|
1173 {
|
|
1174 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
|
|
1175 (__v2df) __B, __C,
|
|
1176 (__v2df) __W,
|
|
1177 (__mmask8) __U);
|
|
1178 }
|
|
1179
|
|
1180 extern __inline __m128d
|
|
1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1182 _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
|
|
1183 {
|
|
1184 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
|
|
1185 (__v2df) __B, __C,
|
|
1186 (__v2df) _mm_setzero_pd (),
|
|
1187 (__mmask8) __U);
|
|
1188 }
|
|
1189
|
|
1190 extern __inline __m128
|
|
1191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1192 _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
|
|
1193 {
|
|
1194 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
|
|
1195 (__v4sf) __B, __C,
|
|
1196 (__v4sf) _mm_setzero_ps (),
|
|
1197 (__mmask8) -1);
|
|
1198 }
|
|
1199
|
|
1200
|
|
1201 extern __inline __m128
|
|
1202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1203 _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
|
1204 __m128 __B, int __C)
|
|
1205 {
|
|
1206 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
|
|
1207 (__v4sf) __B, __C,
|
|
1208 (__v4sf) __W,
|
|
1209 (__mmask8) __U);
|
|
1210 }
|
|
1211
|
|
1212 extern __inline __m128
|
|
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1214 _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
|
|
1215 {
|
|
1216 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
|
|
1217 (__v4sf) __B, __C,
|
|
1218 (__v4sf) _mm_setzero_ps (),
|
|
1219 (__mmask8) __U);
|
|
1220 }
|
|
1221
|
|
1222 extern __inline __m128d
|
|
1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1224 _mm_range_sd (__m128d __A, __m128d __B, int __C)
|
|
1225 {
|
131
|
1226 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
|
1227 (__v2df) __B, __C,
|
|
1228 (__v2df)
|
|
1229 _mm_setzero_pd (),
|
|
1230 (__mmask8) -1,
|
|
1231 _MM_FROUND_CUR_DIRECTION);
|
|
1232 }
|
|
1233
|
|
1234 extern __inline __m128d
|
|
1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1236 _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
|
|
1237 {
|
|
1238 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
|
1239 (__v2df) __B, __C,
|
|
1240 (__v2df) __W,
|
|
1241 (__mmask8) __U,
|
|
1242 _MM_FROUND_CUR_DIRECTION);
|
|
1243 }
|
|
1244
|
|
1245 extern __inline __m128d
|
|
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1247 _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
|
|
1248 {
|
|
1249 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
111
|
1250 (__v2df) __B, __C,
|
131
|
1251 (__v2df)
|
|
1252 _mm_setzero_pd (),
|
|
1253 (__mmask8) __U,
|
111
|
1254 _MM_FROUND_CUR_DIRECTION);
|
|
1255 }
|
|
1256
|
131
|
1257 extern __inline __m128
|
|
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1259 _mm_range_ss (__m128 __A, __m128 __B, int __C)
|
|
1260 {
|
|
1261 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
|
1262 (__v4sf) __B, __C,
|
|
1263 (__v4sf)
|
|
1264 _mm_setzero_ps (),
|
|
1265 (__mmask8) -1,
|
|
1266 _MM_FROUND_CUR_DIRECTION);
|
|
1267 }
|
|
1268
|
|
1269 extern __inline __m128
|
|
1270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1271 _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
|
|
1272 {
|
|
1273 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
|
1274 (__v4sf) __B, __C,
|
|
1275 (__v4sf) __W,
|
|
1276 (__mmask8) __U,
|
|
1277 _MM_FROUND_CUR_DIRECTION);
|
|
1278 }
|
|
1279
|
111
|
1280
|
|
1281 extern __inline __m128
|
|
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
131
|
1283 _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
|
111
|
1284 {
|
131
|
1285 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
111
|
1286 (__v4sf) __B, __C,
|
131
|
1287 (__v4sf)
|
|
1288 _mm_setzero_ps (),
|
|
1289 (__mmask8) __U,
|
111
|
1290 _MM_FROUND_CUR_DIRECTION);
|
|
1291 }
|
|
1292
|
|
1293 extern __inline __m128d
|
|
1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1295 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
|
|
1296 {
|
131
|
1297 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
|
1298 (__v2df) __B, __C,
|
|
1299 (__v2df)
|
|
1300 _mm_setzero_pd (),
|
|
1301 (__mmask8) -1, __R);
|
|
1302 }
|
|
1303
|
|
1304 extern __inline __m128d
|
|
1305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1306 _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
|
|
1307 int __C, const int __R)
|
|
1308 {
|
|
1309 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
111
|
1310 (__v2df) __B, __C,
|
131
|
1311 (__v2df) __W,
|
|
1312 (__mmask8) __U, __R);
|
|
1313 }
|
|
1314
|
|
1315 extern __inline __m128d
|
|
1316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1317 _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
|
|
1318 const int __R)
|
|
1319 {
|
|
1320 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
|
|
1321 (__v2df) __B, __C,
|
|
1322 (__v2df)
|
|
1323 _mm_setzero_pd (),
|
|
1324 (__mmask8) __U, __R);
|
111
|
1325 }
|
|
1326
|
|
1327 extern __inline __m128
|
|
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1329 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
|
|
1330 {
|
131
|
1331 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
|
1332 (__v4sf) __B, __C,
|
|
1333 (__v4sf)
|
|
1334 _mm_setzero_ps (),
|
|
1335 (__mmask8) -1, __R);
|
|
1336 }
|
|
1337
|
|
1338 extern __inline __m128
|
|
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1340 _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
|
|
1341 int __C, const int __R)
|
|
1342 {
|
|
1343 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
111
|
1344 (__v4sf) __B, __C,
|
131
|
1345 (__v4sf) __W,
|
|
1346 (__mmask8) __U, __R);
|
|
1347 }
|
|
1348
|
|
1349 extern __inline __m128
|
|
1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1351 _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
|
|
1352 const int __R)
|
|
1353 {
|
|
1354 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
|
|
1355 (__v4sf) __B, __C,
|
|
1356 (__v4sf)
|
|
1357 _mm_setzero_ps (),
|
|
1358 (__mmask8) __U, __R);
|
111
|
1359 }
|
|
1360
|
|
1361 extern __inline __mmask8
|
|
1362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1363 _mm_fpclass_ss_mask (__m128 __A, const int __imm)
|
|
1364 {
|
|
1365 return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
|
|
1366 }
|
|
1367
|
|
1368 extern __inline __mmask8
|
|
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1370 _mm_fpclass_sd_mask (__m128d __A, const int __imm)
|
|
1371 {
|
|
1372 return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
|
|
1373 }
|
|
1374
|
|
1375 extern __inline __m512i
|
|
1376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1377 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
|
|
1378 {
|
|
1379 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
1380 (__v8di)
|
|
1381 _mm512_setzero_si512 (),
|
|
1382 (__mmask8) -1,
|
|
1383 __R);
|
|
1384 }
|
|
1385
|
|
1386 extern __inline __m512i
|
|
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1388 _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
|
|
1389 const int __R)
|
|
1390 {
|
|
1391 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
1392 (__v8di) __W,
|
|
1393 (__mmask8) __U,
|
|
1394 __R);
|
|
1395 }
|
|
1396
|
|
1397 extern __inline __m512i
|
|
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1399 _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
|
|
1400 const int __R)
|
|
1401 {
|
|
1402 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
|
1403 (__v8di)
|
|
1404 _mm512_setzero_si512 (),
|
|
1405 (__mmask8) __U,
|
|
1406 __R);
|
|
1407 }
|
|
1408
|
|
1409 extern __inline __m512i
|
|
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1411 _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
|
|
1412 {
|
|
1413 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
1414 (__v8di)
|
|
1415 _mm512_setzero_si512 (),
|
|
1416 (__mmask8) -1,
|
|
1417 __R);
|
|
1418 }
|
|
1419
|
|
1420 extern __inline __m512i
|
|
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1422 _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
|
|
1423 const int __R)
|
|
1424 {
|
|
1425 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
1426 (__v8di) __W,
|
|
1427 (__mmask8) __U,
|
|
1428 __R);
|
|
1429 }
|
|
1430
|
|
1431 extern __inline __m512i
|
|
1432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1433 _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
|
|
1434 const int __R)
|
|
1435 {
|
|
1436 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
|
1437 (__v8di)
|
|
1438 _mm512_setzero_si512 (),
|
|
1439 (__mmask8) __U,
|
|
1440 __R);
|
|
1441 }
|
|
1442
|
|
1443 extern __inline __m512i
|
|
1444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1445 _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
|
|
1446 {
|
|
1447 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
1448 (__v8di)
|
|
1449 _mm512_setzero_si512 (),
|
|
1450 (__mmask8) -1,
|
|
1451 __R);
|
|
1452 }
|
|
1453
|
|
1454 extern __inline __m512i
|
|
1455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1456 _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
|
|
1457 const int __R)
|
|
1458 {
|
|
1459 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
1460 (__v8di) __W,
|
|
1461 (__mmask8) __U,
|
|
1462 __R);
|
|
1463 }
|
|
1464
|
|
1465 extern __inline __m512i
|
|
1466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1467 _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
|
|
1468 const int __R)
|
|
1469 {
|
|
1470 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
|
1471 (__v8di)
|
|
1472 _mm512_setzero_si512 (),
|
|
1473 (__mmask8) __U,
|
|
1474 __R);
|
|
1475 }
|
|
1476
|
|
1477 extern __inline __m512i
|
|
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1479 _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
|
|
1480 {
|
|
1481 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
1482 (__v8di)
|
|
1483 _mm512_setzero_si512 (),
|
|
1484 (__mmask8) -1,
|
|
1485 __R);
|
|
1486 }
|
|
1487
|
|
1488 extern __inline __m512i
|
|
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1490 _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
|
|
1491 const int __R)
|
|
1492 {
|
|
1493 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
1494 (__v8di) __W,
|
|
1495 (__mmask8) __U,
|
|
1496 __R);
|
|
1497 }
|
|
1498
|
|
1499 extern __inline __m512i
|
|
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1501 _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
|
|
1502 const int __R)
|
|
1503 {
|
|
1504 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
|
1505 (__v8di)
|
|
1506 _mm512_setzero_si512 (),
|
|
1507 (__mmask8) __U,
|
|
1508 __R);
|
|
1509 }
|
|
1510
|
|
1511 extern __inline __m512i
|
|
1512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1513 _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
|
|
1514 {
|
|
1515 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
1516 (__v8di)
|
|
1517 _mm512_setzero_si512 (),
|
|
1518 (__mmask8) -1,
|
|
1519 __R);
|
|
1520 }
|
|
1521
|
|
1522 extern __inline __m512i
|
|
1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1524 _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
|
|
1525 const int __R)
|
|
1526 {
|
|
1527 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
1528 (__v8di) __W,
|
|
1529 (__mmask8) __U,
|
|
1530 __R);
|
|
1531 }
|
|
1532
|
|
1533 extern __inline __m512i
|
|
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1535 _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
|
|
1536 const int __R)
|
|
1537 {
|
|
1538 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
|
1539 (__v8di)
|
|
1540 _mm512_setzero_si512 (),
|
|
1541 (__mmask8) __U,
|
|
1542 __R);
|
|
1543 }
|
|
1544
|
|
1545 extern __inline __m512i
|
|
1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1547 _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
|
|
1548 {
|
|
1549 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
1550 (__v8di)
|
|
1551 _mm512_setzero_si512 (),
|
|
1552 (__mmask8) -1,
|
|
1553 __R);
|
|
1554 }
|
|
1555
|
|
1556 extern __inline __m512i
|
|
1557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1558 _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
|
|
1559 const int __R)
|
|
1560 {
|
|
1561 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
1562 (__v8di) __W,
|
|
1563 (__mmask8) __U,
|
|
1564 __R);
|
|
1565 }
|
|
1566
|
|
1567 extern __inline __m512i
|
|
1568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1569 _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
|
|
1570 const int __R)
|
|
1571 {
|
|
1572 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
|
1573 (__v8di)
|
|
1574 _mm512_setzero_si512 (),
|
|
1575 (__mmask8) __U,
|
|
1576 __R);
|
|
1577 }
|
|
1578
|
|
1579 extern __inline __m512i
|
|
1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1581 _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
|
|
1582 {
|
|
1583 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
1584 (__v8di)
|
|
1585 _mm512_setzero_si512 (),
|
|
1586 (__mmask8) -1,
|
|
1587 __R);
|
|
1588 }
|
|
1589
|
|
1590 extern __inline __m512i
|
|
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1592 _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
|
|
1593 const int __R)
|
|
1594 {
|
|
1595 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
1596 (__v8di) __W,
|
|
1597 (__mmask8) __U,
|
|
1598 __R);
|
|
1599 }
|
|
1600
|
|
1601 extern __inline __m512i
|
|
1602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1603 _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
|
|
1604 const int __R)
|
|
1605 {
|
|
1606 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
|
1607 (__v8di)
|
|
1608 _mm512_setzero_si512 (),
|
|
1609 (__mmask8) __U,
|
|
1610 __R);
|
|
1611 }
|
|
1612
|
|
1613 extern __inline __m512i
|
|
1614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1615 _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
|
|
1616 {
|
|
1617 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
1618 (__v8di)
|
|
1619 _mm512_setzero_si512 (),
|
|
1620 (__mmask8) -1,
|
|
1621 __R);
|
|
1622 }
|
|
1623
|
|
1624 extern __inline __m512i
|
|
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1626 _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
|
|
1627 const int __R)
|
|
1628 {
|
|
1629 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
1630 (__v8di) __W,
|
|
1631 (__mmask8) __U,
|
|
1632 __R);
|
|
1633 }
|
|
1634
|
|
1635 extern __inline __m512i
|
|
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1637 _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
|
|
1638 const int __R)
|
|
1639 {
|
|
1640 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
|
1641 (__v8di)
|
|
1642 _mm512_setzero_si512 (),
|
|
1643 (__mmask8) __U,
|
|
1644 __R);
|
|
1645 }
|
|
1646
|
|
1647 extern __inline __m256
|
|
1648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1649 _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
|
|
1650 {
|
|
1651 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
1652 (__v8sf)
|
|
1653 _mm256_setzero_ps (),
|
|
1654 (__mmask8) -1,
|
|
1655 __R);
|
|
1656 }
|
|
1657
|
|
1658 extern __inline __m256
|
|
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1660 _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
|
|
1661 const int __R)
|
|
1662 {
|
|
1663 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
1664 (__v8sf) __W,
|
|
1665 (__mmask8) __U,
|
|
1666 __R);
|
|
1667 }
|
|
1668
|
|
1669 extern __inline __m256
|
|
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1671 _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
|
|
1672 const int __R)
|
|
1673 {
|
|
1674 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
|
1675 (__v8sf)
|
|
1676 _mm256_setzero_ps (),
|
|
1677 (__mmask8) __U,
|
|
1678 __R);
|
|
1679 }
|
|
1680
|
|
1681 extern __inline __m256
|
|
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1683 _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
|
|
1684 {
|
|
1685 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
1686 (__v8sf)
|
|
1687 _mm256_setzero_ps (),
|
|
1688 (__mmask8) -1,
|
|
1689 __R);
|
|
1690 }
|
|
1691
|
|
1692 extern __inline __m256
|
|
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1694 _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
|
|
1695 const int __R)
|
|
1696 {
|
|
1697 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
1698 (__v8sf) __W,
|
|
1699 (__mmask8) __U,
|
|
1700 __R);
|
|
1701 }
|
|
1702
|
|
1703 extern __inline __m256
|
|
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1705 _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
|
|
1706 const int __R)
|
|
1707 {
|
|
1708 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
|
1709 (__v8sf)
|
|
1710 _mm256_setzero_ps (),
|
|
1711 (__mmask8) __U,
|
|
1712 __R);
|
|
1713 }
|
|
1714
|
|
1715 extern __inline __m512d
|
|
1716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1717 _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
|
|
1718 {
|
|
1719 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1720 (__v8df)
|
|
1721 _mm512_setzero_pd (),
|
|
1722 (__mmask8) -1,
|
|
1723 __R);
|
|
1724 }
|
|
1725
|
|
1726 extern __inline __m512d
|
|
1727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1728 _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
|
|
1729 const int __R)
|
|
1730 {
|
|
1731 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1732 (__v8df) __W,
|
|
1733 (__mmask8) __U,
|
|
1734 __R);
|
|
1735 }
|
|
1736
|
|
1737 extern __inline __m512d
|
|
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1739 _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
|
|
1740 const int __R)
|
|
1741 {
|
|
1742 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
|
|
1743 (__v8df)
|
|
1744 _mm512_setzero_pd (),
|
|
1745 (__mmask8) __U,
|
|
1746 __R);
|
|
1747 }
|
|
1748
|
|
1749 extern __inline __m512d
|
|
1750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1751 _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
|
|
1752 {
|
|
1753 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1754 (__v8df)
|
|
1755 _mm512_setzero_pd (),
|
|
1756 (__mmask8) -1,
|
|
1757 __R);
|
|
1758 }
|
|
1759
|
|
1760 extern __inline __m512d
|
|
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1762 _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
|
|
1763 const int __R)
|
|
1764 {
|
|
1765 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1766 (__v8df) __W,
|
|
1767 (__mmask8) __U,
|
|
1768 __R);
|
|
1769 }
|
|
1770
|
|
1771 extern __inline __m512d
|
|
1772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1773 _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
|
|
1774 const int __R)
|
|
1775 {
|
|
1776 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
|
|
1777 (__v8df)
|
|
1778 _mm512_setzero_pd (),
|
|
1779 (__mmask8) __U,
|
|
1780 __R);
|
|
1781 }
|
|
1782
|
|
1783 extern __inline __m512d
|
|
1784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1785 _mm512_reduce_pd (__m512d __A, int __B)
|
|
1786 {
|
|
1787 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
|
|
1788 (__v8df)
|
|
1789 _mm512_setzero_pd (),
|
|
1790 (__mmask8) -1);
|
|
1791 }
|
|
1792
|
|
1793 extern __inline __m512d
|
|
1794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1795 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
|
|
1796 {
|
|
1797 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
|
|
1798 (__v8df) __W,
|
|
1799 (__mmask8) __U);
|
|
1800 }
|
|
1801
|
|
1802 extern __inline __m512d
|
|
1803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1804 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
|
|
1805 {
|
|
1806 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
|
|
1807 (__v8df)
|
|
1808 _mm512_setzero_pd (),
|
|
1809 (__mmask8) __U);
|
|
1810 }
|
|
1811
|
|
1812 extern __inline __m512
|
|
1813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1814 _mm512_reduce_ps (__m512 __A, int __B)
|
|
1815 {
|
|
1816 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
|
|
1817 (__v16sf)
|
|
1818 _mm512_setzero_ps (),
|
|
1819 (__mmask16) -1);
|
|
1820 }
|
|
1821
|
|
1822 extern __inline __m512
|
|
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1824 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
|
|
1825 {
|
|
1826 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
|
|
1827 (__v16sf) __W,
|
|
1828 (__mmask16) __U);
|
|
1829 }
|
|
1830
|
|
1831 extern __inline __m512
|
|
1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1833 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
|
|
1834 {
|
|
1835 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
|
|
1836 (__v16sf)
|
|
1837 _mm512_setzero_ps (),
|
|
1838 (__mmask16) __U);
|
|
1839 }
|
|
1840
|
|
1841 extern __inline __m256
|
|
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1843 _mm512_extractf32x8_ps (__m512 __A, const int __imm)
|
|
1844 {
|
|
1845 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
|
|
1846 __imm,
|
|
1847 (__v8sf)
|
|
1848 _mm256_setzero_ps (),
|
|
1849 (__mmask8) -1);
|
|
1850 }
|
|
1851
|
|
1852 extern __inline __m256
|
|
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1854 _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
|
|
1855 const int __imm)
|
|
1856 {
|
|
1857 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
|
|
1858 __imm,
|
|
1859 (__v8sf) __W,
|
|
1860 (__mmask8) __U);
|
|
1861 }
|
|
1862
|
|
1863 extern __inline __m256
|
|
1864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1865 _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
|
|
1866 const int __imm)
|
|
1867 {
|
|
1868 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
|
|
1869 __imm,
|
|
1870 (__v8sf)
|
|
1871 _mm256_setzero_ps (),
|
|
1872 (__mmask8) __U);
|
|
1873 }
|
|
1874
|
|
1875 extern __inline __m128d
|
|
1876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1877 _mm512_extractf64x2_pd (__m512d __A, const int __imm)
|
|
1878 {
|
|
1879 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
|
|
1880 __imm,
|
|
1881 (__v2df)
|
|
1882 _mm_setzero_pd (),
|
|
1883 (__mmask8) -1);
|
|
1884 }
|
|
1885
|
|
1886 extern __inline __m128d
|
|
1887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1888 _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
|
|
1889 const int __imm)
|
|
1890 {
|
|
1891 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
|
|
1892 __imm,
|
|
1893 (__v2df) __W,
|
|
1894 (__mmask8)
|
|
1895 __U);
|
|
1896 }
|
|
1897
|
|
1898 extern __inline __m128d
|
|
1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1900 _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
|
|
1901 const int __imm)
|
|
1902 {
|
|
1903 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
|
|
1904 __imm,
|
|
1905 (__v2df)
|
|
1906 _mm_setzero_pd (),
|
|
1907 (__mmask8)
|
|
1908 __U);
|
|
1909 }
|
|
1910
|
|
1911 extern __inline __m256i
|
|
1912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1913 _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
|
|
1914 {
|
|
1915 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
|
|
1916 __imm,
|
|
1917 (__v8si)
|
|
1918 _mm256_setzero_si256 (),
|
|
1919 (__mmask8) -1);
|
|
1920 }
|
|
1921
|
|
1922 extern __inline __m256i
|
|
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1924 _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
|
|
1925 const int __imm)
|
|
1926 {
|
|
1927 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
|
|
1928 __imm,
|
|
1929 (__v8si) __W,
|
|
1930 (__mmask8) __U);
|
|
1931 }
|
|
1932
|
|
1933 extern __inline __m256i
|
|
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1935 _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
|
|
1936 const int __imm)
|
|
1937 {
|
|
1938 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
|
|
1939 __imm,
|
|
1940 (__v8si)
|
|
1941 _mm256_setzero_si256 (),
|
|
1942 (__mmask8) __U);
|
|
1943 }
|
|
1944
|
|
1945 extern __inline __m128i
|
|
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1947 _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
|
|
1948 {
|
|
1949 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
|
|
1950 __imm,
|
|
1951 (__v2di)
|
|
1952 _mm_setzero_si128 (),
|
|
1953 (__mmask8) -1);
|
|
1954 }
|
|
1955
|
|
1956 extern __inline __m128i
|
|
1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1958 _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
|
|
1959 const int __imm)
|
|
1960 {
|
|
1961 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
|
|
1962 __imm,
|
|
1963 (__v2di) __W,
|
|
1964 (__mmask8)
|
|
1965 __U);
|
|
1966 }
|
|
1967
|
|
1968 extern __inline __m128i
|
|
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1970 _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
|
|
1971 const int __imm)
|
|
1972 {
|
|
1973 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
|
|
1974 __imm,
|
|
1975 (__v2di)
|
|
1976 _mm_setzero_si128 (),
|
|
1977 (__mmask8)
|
|
1978 __U);
|
|
1979 }
|
|
1980
|
|
1981 extern __inline __m512d
|
|
1982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1983 _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
|
|
1984 const int __R)
|
|
1985 {
|
|
1986 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
1987 (__v8df) __B, __C,
|
|
1988 (__v8df)
|
|
1989 _mm512_setzero_pd (),
|
|
1990 (__mmask8) -1,
|
|
1991 __R);
|
|
1992 }
|
|
1993
|
|
1994 extern __inline __m512d
|
|
1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1996 _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
|
|
1997 __m512d __A, __m512d __B, int __C,
|
|
1998 const int __R)
|
|
1999 {
|
|
2000 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
2001 (__v8df) __B, __C,
|
|
2002 (__v8df) __W,
|
|
2003 (__mmask8) __U,
|
|
2004 __R);
|
|
2005 }
|
|
2006
|
|
2007 extern __inline __m512d
|
|
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2009 _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
|
|
2010 int __C, const int __R)
|
|
2011 {
|
|
2012 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
|
|
2013 (__v8df) __B, __C,
|
|
2014 (__v8df)
|
|
2015 _mm512_setzero_pd (),
|
|
2016 (__mmask8) __U,
|
|
2017 __R);
|
|
2018 }
|
|
2019
|
|
2020 extern __inline __m512
|
|
2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2022 _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
|
|
2023 {
|
|
2024 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
2025 (__v16sf) __B, __C,
|
|
2026 (__v16sf)
|
|
2027 _mm512_setzero_ps (),
|
|
2028 (__mmask16) -1,
|
|
2029 __R);
|
|
2030 }
|
|
2031
|
|
2032 extern __inline __m512
|
|
2033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2034 _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
|
|
2035 __m512 __A, __m512 __B, int __C,
|
|
2036 const int __R)
|
|
2037 {
|
|
2038 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
2039 (__v16sf) __B, __C,
|
|
2040 (__v16sf) __W,
|
|
2041 (__mmask16) __U,
|
|
2042 __R);
|
|
2043 }
|
|
2044
|
|
2045 extern __inline __m512
|
|
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2047 _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
|
|
2048 int __C, const int __R)
|
|
2049 {
|
|
2050 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
|
|
2051 (__v16sf) __B, __C,
|
|
2052 (__v16sf)
|
|
2053 _mm512_setzero_ps (),
|
|
2054 (__mmask16) __U,
|
|
2055 __R);
|
|
2056 }
|
|
2057
|
|
2058 extern __inline __m512i
|
|
2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2060 _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
|
|
2061 {
|
|
2062 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
|
|
2063 (__v8si) __B,
|
|
2064 __imm,
|
|
2065 (__v16si)
|
|
2066 _mm512_setzero_si512 (),
|
|
2067 (__mmask16) -1);
|
|
2068 }
|
|
2069
|
|
2070 extern __inline __m512i
|
|
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2072 _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
|
|
2073 __m256i __B, const int __imm)
|
|
2074 {
|
|
2075 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
|
|
2076 (__v8si) __B,
|
|
2077 __imm,
|
|
2078 (__v16si) __W,
|
|
2079 (__mmask16) __U);
|
|
2080 }
|
|
2081
|
|
2082 extern __inline __m512i
|
|
2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2084 _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
|
|
2085 const int __imm)
|
|
2086 {
|
|
2087 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
|
|
2088 (__v8si) __B,
|
|
2089 __imm,
|
|
2090 (__v16si)
|
|
2091 _mm512_setzero_si512 (),
|
|
2092 (__mmask16) __U);
|
|
2093 }
|
|
2094
|
|
2095 extern __inline __m512
|
|
2096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2097 _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
|
|
2098 {
|
|
2099 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
|
|
2100 (__v8sf) __B,
|
|
2101 __imm,
|
|
2102 (__v16sf)
|
|
2103 _mm512_setzero_ps (),
|
|
2104 (__mmask16) -1);
|
|
2105 }
|
|
2106
|
|
2107 extern __inline __m512
|
|
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2109 _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
|
|
2110 __m256 __B, const int __imm)
|
|
2111 {
|
|
2112 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
|
|
2113 (__v8sf) __B,
|
|
2114 __imm,
|
|
2115 (__v16sf) __W,
|
|
2116 (__mmask16) __U);
|
|
2117 }
|
|
2118
|
|
2119 extern __inline __m512
|
|
2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2121 _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
|
|
2122 const int __imm)
|
|
2123 {
|
|
2124 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
|
|
2125 (__v8sf) __B,
|
|
2126 __imm,
|
|
2127 (__v16sf)
|
|
2128 _mm512_setzero_ps (),
|
|
2129 (__mmask16) __U);
|
|
2130 }
|
|
2131
|
|
2132 extern __inline __m512i
|
|
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2134 _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
|
|
2135 {
|
|
2136 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
|
|
2137 (__v2di) __B,
|
|
2138 __imm,
|
|
2139 (__v8di)
|
|
2140 _mm512_setzero_si512 (),
|
|
2141 (__mmask8) -1);
|
|
2142 }
|
|
2143
|
|
2144 extern __inline __m512i
|
|
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2146 _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
|
|
2147 __m128i __B, const int __imm)
|
|
2148 {
|
|
2149 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
|
|
2150 (__v2di) __B,
|
|
2151 __imm,
|
|
2152 (__v8di) __W,
|
|
2153 (__mmask8)
|
|
2154 __U);
|
|
2155 }
|
|
2156
|
|
2157 extern __inline __m512i
|
|
2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2159 _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
|
|
2160 const int __imm)
|
|
2161 {
|
|
2162 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
|
|
2163 (__v2di) __B,
|
|
2164 __imm,
|
|
2165 (__v8di)
|
|
2166 _mm512_setzero_si512 (),
|
|
2167 (__mmask8)
|
|
2168 __U);
|
|
2169 }
|
|
2170
|
|
2171 extern __inline __m512d
|
|
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2173 _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
|
|
2174 {
|
|
2175 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
|
|
2176 (__v2df) __B,
|
|
2177 __imm,
|
|
2178 (__v8df)
|
|
2179 _mm512_setzero_pd (),
|
|
2180 (__mmask8) -1);
|
|
2181 }
|
|
2182
|
|
2183 extern __inline __m512d
|
|
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2185 _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
|
|
2186 __m128d __B, const int __imm)
|
|
2187 {
|
|
2188 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
|
|
2189 (__v2df) __B,
|
|
2190 __imm,
|
|
2191 (__v8df) __W,
|
|
2192 (__mmask8)
|
|
2193 __U);
|
|
2194 }
|
|
2195
|
|
2196 extern __inline __m512d
|
|
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2198 _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
|
|
2199 const int __imm)
|
|
2200 {
|
|
2201 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
|
|
2202 (__v2df) __B,
|
|
2203 __imm,
|
|
2204 (__v8df)
|
|
2205 _mm512_setzero_pd (),
|
|
2206 (__mmask8)
|
|
2207 __U);
|
|
2208 }
|
|
2209
|
|
2210 extern __inline __mmask8
|
|
2211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2212 _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
|
|
2213 const int __imm)
|
|
2214 {
|
|
2215 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
|
|
2216 __imm, __U);
|
|
2217 }
|
|
2218
|
|
2219 extern __inline __mmask8
|
|
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2221 _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
|
|
2222 {
|
|
2223 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
|
|
2224 __imm,
|
|
2225 (__mmask8) -1);
|
|
2226 }
|
|
2227
|
|
2228 extern __inline __mmask16
|
|
2229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2230 _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
|
|
2231 const int __imm)
|
|
2232 {
|
|
2233 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
|
|
2234 __imm, __U);
|
|
2235 }
|
|
2236
|
|
2237 extern __inline __mmask16
|
|
2238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
2239 _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
|
|
2240 {
|
|
2241 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
|
|
2242 __imm,
|
|
2243 (__mmask16) -1);
|
|
2244 }
|
|
2245
|
|
2246 #else
|
|
2247 #define _kshiftli_mask8(X, Y) \
|
|
2248 ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
|
|
2249
|
|
2250 #define _kshiftri_mask8(X, Y) \
|
|
2251 ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
|
|
2252
|
131
|
2253 #define _mm_range_sd(A, B, C) \
|
|
2254 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2255 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2256 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
|
|
2257
|
|
2258 #define _mm_mask_range_sd(W, U, A, B, C) \
|
|
2259 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2260 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
|
|
2261 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2262
|
|
2263 #define _mm_maskz_range_sd(U, A, B, C) \
|
|
2264 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2265 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2266 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
111
|
2267
|
|
2268 #define _mm_range_ss(A, B, C) \
|
131
|
2269 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2270 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2271 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
|
|
2272
|
|
2273 #define _mm_mask_range_ss(W, U, A, B, C) \
|
|
2274 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2275 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
|
|
2276 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2277
|
|
2278 #define _mm_maskz_range_ss(U, A, B, C) \
|
|
2279 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2280 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2281 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
111
|
2282
|
131
|
2283 #define _mm_range_round_sd(A, B, C, R) \
|
|
2284 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2285 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2286 (__mmask8) -1, (R)))
|
|
2287
|
|
2288 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
|
|
2289 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2290 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
|
|
2291 (__mmask8)(U), (R)))
|
|
2292
|
|
2293 #define _mm_maskz_range_round_sd(U, A, B, C, R) \
|
|
2294 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
|
|
2295 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2296 (__mmask8)(U), (R)))
|
111
|
2297
|
|
2298 #define _mm_range_round_ss(A, B, C, R) \
|
131
|
2299 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2300 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2301 (__mmask8) -1, (R)))
|
|
2302
|
|
2303 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
|
|
2304 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2305 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
|
|
2306 (__mmask8)(U), (R)))
|
|
2307
|
|
2308 #define _mm_maskz_range_round_ss(U, A, B, C, R) \
|
|
2309 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
|
|
2310 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2311 (__mmask8)(U), (R)))
|
111
|
2312
|
|
2313 #define _mm512_cvtt_roundpd_epi64(A, B) \
|
|
2314 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
|
|
2315 _mm512_setzero_si512 (), \
|
|
2316 -1, (B)))
|
|
2317
|
|
2318 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
|
|
2319 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2320
|
|
2321 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
|
|
2322 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2323
|
|
2324 #define _mm512_cvtt_roundpd_epu64(A, B) \
|
|
2325 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2326
|
|
2327 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
|
|
2328 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2329
|
|
2330 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
|
|
2331 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2332
|
|
2333 #define _mm512_cvtt_roundps_epi64(A, B) \
|
|
2334 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2335
|
|
2336 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
|
|
2337 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2338
|
|
2339 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
|
|
2340 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2341
|
|
2342 #define _mm512_cvtt_roundps_epu64(A, B) \
|
|
2343 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2344
|
|
2345 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
|
|
2346 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2347
|
|
2348 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
|
|
2349 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2350
|
|
2351 #define _mm512_cvt_roundpd_epi64(A, B) \
|
|
2352 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2353
|
|
2354 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
|
|
2355 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2356
|
|
2357 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
|
|
2358 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2359
|
|
2360 #define _mm512_cvt_roundpd_epu64(A, B) \
|
|
2361 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2362
|
|
2363 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
|
|
2364 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2365
|
|
2366 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
|
|
2367 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2368
|
|
2369 #define _mm512_cvt_roundps_epi64(A, B) \
|
|
2370 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2371
|
|
2372 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
|
|
2373 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2374
|
|
2375 #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
|
|
2376 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2377
|
|
2378 #define _mm512_cvt_roundps_epu64(A, B) \
|
|
2379 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
|
|
2380
|
|
2381 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
|
|
2382 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
|
|
2383
|
|
2384 #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
|
|
2385 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
|
|
2386
|
|
2387 #define _mm512_cvt_roundepi64_ps(A, B) \
|
|
2388 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
|
|
2389
|
|
2390 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
|
|
2391 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
|
|
2392
|
|
2393 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
|
|
2394 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
|
|
2395
|
|
2396 #define _mm512_cvt_roundepu64_ps(A, B) \
|
|
2397 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
|
|
2398
|
|
2399 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
|
|
2400 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
|
|
2401
|
|
2402 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
|
|
2403 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
|
|
2404
|
|
2405 #define _mm512_cvt_roundepi64_pd(A, B) \
|
|
2406 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
|
|
2407
|
|
2408 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
|
|
2409 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
|
|
2410
|
|
2411 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
|
|
2412 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
|
|
2413
|
|
2414 #define _mm512_cvt_roundepu64_pd(A, B) \
|
|
2415 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
|
|
2416
|
|
2417 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
|
|
2418 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
|
|
2419
|
|
2420 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
|
|
2421 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
|
|
2422
|
|
2423 #define _mm512_reduce_pd(A, B) \
|
|
2424 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
|
|
2425 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
|
|
2426
|
|
2427 #define _mm512_mask_reduce_pd(W, U, A, B) \
|
|
2428 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
|
|
2429 (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
|
|
2430
|
|
2431 #define _mm512_maskz_reduce_pd(U, A, B) \
|
|
2432 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
|
|
2433 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
|
|
2434
|
|
2435 #define _mm512_reduce_ps(A, B) \
|
|
2436 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
|
|
2437 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
|
|
2438
|
|
2439 #define _mm512_mask_reduce_ps(W, U, A, B) \
|
|
2440 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
|
|
2441 (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
|
|
2442
|
|
2443 #define _mm512_maskz_reduce_ps(U, A, B) \
|
|
2444 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
|
|
2445 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
|
|
2446
|
|
2447 #define _mm512_extractf32x8_ps(X, C) \
|
|
2448 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2449 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
|
|
2450
|
|
2451 #define _mm512_mask_extractf32x8_ps(W, U, X, C) \
|
|
2452 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2453 (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
|
|
2454
|
|
2455 #define _mm512_maskz_extractf32x8_ps(U, X, C) \
|
|
2456 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2457 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
|
|
2458
|
|
2459 #define _mm512_extractf64x2_pd(X, C) \
|
|
2460 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2461 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
|
|
2462
|
|
2463 #define _mm512_mask_extractf64x2_pd(W, U, X, C) \
|
|
2464 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2465 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
|
|
2466
|
|
2467 #define _mm512_maskz_extractf64x2_pd(U, X, C) \
|
|
2468 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2469 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
|
|
2470
|
|
2471 #define _mm512_extracti32x8_epi32(X, C) \
|
|
2472 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2473 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
|
|
2474
|
|
2475 #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
|
|
2476 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2477 (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
|
|
2478
|
|
2479 #define _mm512_maskz_extracti32x8_epi32(U, X, C) \
|
|
2480 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2481 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
|
|
2482
|
|
2483 #define _mm512_extracti64x2_epi64(X, C) \
|
|
2484 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2485 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
|
|
2486
|
|
2487 #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
|
|
2488 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2489 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
|
|
2490
|
|
2491 #define _mm512_maskz_extracti64x2_epi64(U, X, C) \
|
|
2492 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2493 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
|
|
2494
|
|
2495 #define _mm512_range_pd(A, B, C) \
|
|
2496 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2497 (__v8df)(__m512d)(B), (int)(C), \
|
|
2498 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
|
|
2499
|
|
2500 #define _mm512_mask_range_pd(W, U, A, B, C) \
|
|
2501 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2502 (__v8df)(__m512d)(B), (int)(C), \
|
|
2503 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2504
|
|
2505 #define _mm512_maskz_range_pd(U, A, B, C) \
|
|
2506 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2507 (__v8df)(__m512d)(B), (int)(C), \
|
|
2508 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2509
|
|
2510 #define _mm512_range_ps(A, B, C) \
|
|
2511 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2512 (__v16sf)(__m512)(B), (int)(C), \
|
|
2513 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
|
|
2514
|
|
2515 #define _mm512_mask_range_ps(W, U, A, B, C) \
|
|
2516 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2517 (__v16sf)(__m512)(B), (int)(C), \
|
|
2518 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2519
|
|
2520 #define _mm512_maskz_range_ps(U, A, B, C) \
|
|
2521 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2522 (__v16sf)(__m512)(B), (int)(C), \
|
|
2523 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
|
|
2524
|
|
2525 #define _mm512_range_round_pd(A, B, C, R) \
|
|
2526 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2527 (__v8df)(__m512d)(B), (int)(C), \
|
|
2528 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
|
|
2529
|
|
2530 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
|
|
2531 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2532 (__v8df)(__m512d)(B), (int)(C), \
|
|
2533 (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
|
|
2534
|
|
2535 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
|
|
2536 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
|
|
2537 (__v8df)(__m512d)(B), (int)(C), \
|
|
2538 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
|
|
2539
|
|
2540 #define _mm512_range_round_ps(A, B, C, R) \
|
|
2541 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2542 (__v16sf)(__m512)(B), (int)(C), \
|
|
2543 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
|
|
2544
|
|
2545 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
|
|
2546 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2547 (__v16sf)(__m512)(B), (int)(C), \
|
|
2548 (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
|
|
2549
|
|
2550 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
|
|
2551 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
|
|
2552 (__v16sf)(__m512)(B), (int)(C), \
|
|
2553 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
|
|
2554
|
|
2555 #define _mm512_insertf64x2(X, Y, C) \
|
|
2556 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2557 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
|
|
2558 (__mmask8)-1))
|
|
2559
|
|
2560 #define _mm512_mask_insertf64x2(W, U, X, Y, C) \
|
|
2561 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2562 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
|
|
2563 (__mmask8) (U)))
|
|
2564
|
|
2565 #define _mm512_maskz_insertf64x2(U, X, Y, C) \
|
|
2566 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
|
|
2567 (__v2df)(__m128d) (Y), (int) (C), \
|
|
2568 (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
|
|
2569
|
|
2570 #define _mm512_inserti64x2(X, Y, C) \
|
|
2571 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2572 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
|
|
2573
|
|
2574 #define _mm512_mask_inserti64x2(W, U, X, Y, C) \
|
|
2575 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2576 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
|
|
2577 (__mmask8) (U)))
|
|
2578
|
|
2579 #define _mm512_maskz_inserti64x2(U, X, Y, C) \
|
|
2580 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
|
|
2581 (__v2di)(__m128i) (Y), (int) (C), \
|
|
2582 (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
|
|
2583
|
|
2584 #define _mm512_insertf32x8(X, Y, C) \
|
|
2585 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2586 (__v8sf)(__m256) (Y), (int) (C),\
|
|
2587 (__v16sf)(__m512)_mm512_setzero_ps (),\
|
|
2588 (__mmask16)-1))
|
|
2589
|
|
2590 #define _mm512_mask_insertf32x8(W, U, X, Y, C) \
|
|
2591 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2592 (__v8sf)(__m256) (Y), (int) (C),\
|
|
2593 (__v16sf)(__m512)(W),\
|
|
2594 (__mmask16)(U)))
|
|
2595
|
|
2596 #define _mm512_maskz_insertf32x8(U, X, Y, C) \
|
|
2597 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
|
|
2598 (__v8sf)(__m256) (Y), (int) (C),\
|
|
2599 (__v16sf)(__m512)_mm512_setzero_ps (),\
|
|
2600 (__mmask16)(U)))
|
|
2601
|
|
2602 #define _mm512_inserti32x8(X, Y, C) \
|
|
2603 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2604 (__v8si)(__m256i) (Y), (int) (C),\
|
|
2605 (__v16si)(__m512i)_mm512_setzero_si512 (),\
|
|
2606 (__mmask16)-1))
|
|
2607
|
|
2608 #define _mm512_mask_inserti32x8(W, U, X, Y, C) \
|
|
2609 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2610 (__v8si)(__m256i) (Y), (int) (C),\
|
|
2611 (__v16si)(__m512i)(W),\
|
|
2612 (__mmask16)(U)))
|
|
2613
|
|
2614 #define _mm512_maskz_inserti32x8(U, X, Y, C) \
|
|
2615 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
|
|
2616 (__v8si)(__m256i) (Y), (int) (C),\
|
|
2617 (__v16si)(__m512i)_mm512_setzero_si512 (),\
|
|
2618 (__mmask16)(U)))
|
|
2619
|
|
2620 #define _mm_fpclass_ss_mask(X, C) \
|
|
2621 ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \
|
|
2622
|
|
2623 #define _mm_fpclass_sd_mask(X, C) \
|
|
2624 ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
|
|
2625
|
|
2626 #define _mm512_mask_fpclass_pd_mask(u, X, C) \
|
|
2627 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
|
|
2628 (int) (C), (__mmask8)(u)))
|
|
2629
|
|
2630 #define _mm512_mask_fpclass_ps_mask(u, x, c) \
|
|
2631 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
|
|
2632 (int) (c),(__mmask8)(u)))
|
|
2633
|
|
2634 #define _mm512_fpclass_pd_mask(X, C) \
|
|
2635 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
|
|
2636 (int) (C), (__mmask8)-1))
|
|
2637
|
|
2638 #define _mm512_fpclass_ps_mask(x, c) \
|
|
2639 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
|
|
2640 (int) (c),(__mmask8)-1))
|
|
2641
|
|
2642 #define _mm_reduce_sd(A, B, C) \
|
|
2643 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
|
|
2644 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2645 (__mmask8)-1))
|
|
2646
|
|
2647 #define _mm_mask_reduce_sd(W, U, A, B, C) \
|
|
2648 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
|
|
2649 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
|
|
2650
|
|
2651 #define _mm_maskz_reduce_sd(U, A, B, C) \
|
|
2652 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
|
|
2653 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
|
|
2654 (__mmask8)(U)))
|
|
2655
|
|
2656 #define _mm_reduce_ss(A, B, C) \
|
|
2657 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
|
|
2658 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2659 (__mmask8)-1))
|
|
2660
|
|
2661 #define _mm_mask_reduce_ss(W, U, A, B, C) \
|
|
2662 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
|
|
2663 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U)))
|
|
2664
|
|
2665 #define _mm_maskz_reduce_ss(U, A, B, C) \
|
|
2666 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
|
|
2667 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
|
|
2668 (__mmask8)(U)))
|
|
2669
|
|
2670
|
|
2671
|
|
2672 #endif
|
|
2673
|
|
2674 #ifdef __DISABLE_AVX512DQ__
|
|
2675 #undef __DISABLE_AVX512DQ__
|
|
2676 #pragma GCC pop_options
|
|
2677 #endif /* __DISABLE_AVX512DQ__ */
|
|
2678
|
|
2679 #endif /* _AVX512DQINTRIN_H_INCLUDED */
|