111
|
1 /* Copyright (C) 2014-2017 Free Software Foundation, Inc.
|
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify
|
|
6 it under the terms of the GNU General Public License as published by
|
|
7 the Free Software Foundation; either version 3, or (at your option)
|
|
8 any later version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU General Public License for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 #ifndef _IMMINTRIN_H_INCLUDED
|
|
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
|
|
26 #endif
|
|
27
|
|
28 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
|
|
29 #define _AVX512VLDQINTRIN_H_INCLUDED
|
|
30
|
|
31 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
|
|
32 #pragma GCC push_options
|
|
33 #pragma GCC target("avx512vl,avx512dq")
|
|
34 #define __DISABLE_AVX512VLDQ__
|
|
35 #endif /* __AVX512VLDQ__ */
|
|
36
|
|
37 extern __inline __m256i
|
|
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
39 _mm256_cvttpd_epi64 (__m256d __A)
|
|
40 {
|
|
41 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
|
|
42 (__v4di)
|
|
43 _mm256_setzero_si256 (),
|
|
44 (__mmask8) -1);
|
|
45 }
|
|
46
|
|
47 extern __inline __m256i
|
|
48 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
49 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
|
|
50 {
|
|
51 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
|
|
52 (__v4di) __W,
|
|
53 (__mmask8) __U);
|
|
54 }
|
|
55
|
|
56 extern __inline __m256i
|
|
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
58 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
|
|
59 {
|
|
60 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
|
|
61 (__v4di)
|
|
62 _mm256_setzero_si256 (),
|
|
63 (__mmask8) __U);
|
|
64 }
|
|
65
|
|
66 extern __inline __m128i
|
|
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
68 _mm_cvttpd_epi64 (__m128d __A)
|
|
69 {
|
|
70 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
|
|
71 (__v2di)
|
|
72 _mm_setzero_si128 (),
|
|
73 (__mmask8) -1);
|
|
74 }
|
|
75
|
|
76 extern __inline __m128i
|
|
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
78 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
|
|
79 {
|
|
80 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
|
|
81 (__v2di) __W,
|
|
82 (__mmask8) __U);
|
|
83 }
|
|
84
|
|
85 extern __inline __m128i
|
|
86 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
87 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
|
|
88 {
|
|
89 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
|
|
90 (__v2di)
|
|
91 _mm_setzero_si128 (),
|
|
92 (__mmask8) __U);
|
|
93 }
|
|
94
|
|
95 extern __inline __m256i
|
|
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
97 _mm256_cvttpd_epu64 (__m256d __A)
|
|
98 {
|
|
99 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
|
|
100 (__v4di)
|
|
101 _mm256_setzero_si256 (),
|
|
102 (__mmask8) -1);
|
|
103 }
|
|
104
|
|
105 extern __inline __m256i
|
|
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
107 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
|
|
108 {
|
|
109 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
|
|
110 (__v4di) __W,
|
|
111 (__mmask8) __U);
|
|
112 }
|
|
113
|
|
114 extern __inline __m256i
|
|
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
116 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
|
|
117 {
|
|
118 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
|
|
119 (__v4di)
|
|
120 _mm256_setzero_si256 (),
|
|
121 (__mmask8) __U);
|
|
122 }
|
|
123
|
|
124 extern __inline __m128i
|
|
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
126 _mm_cvttpd_epu64 (__m128d __A)
|
|
127 {
|
|
128 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
|
|
129 (__v2di)
|
|
130 _mm_setzero_si128 (),
|
|
131 (__mmask8) -1);
|
|
132 }
|
|
133
|
|
134 extern __inline __m128i
|
|
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
136 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
|
|
137 {
|
|
138 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
|
|
139 (__v2di) __W,
|
|
140 (__mmask8) __U);
|
|
141 }
|
|
142
|
|
143 extern __inline __m128i
|
|
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
145 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
|
|
146 {
|
|
147 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
|
|
148 (__v2di)
|
|
149 _mm_setzero_si128 (),
|
|
150 (__mmask8) __U);
|
|
151 }
|
|
152
|
|
153 extern __inline __m256i
|
|
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
155 _mm256_cvtpd_epi64 (__m256d __A)
|
|
156 {
|
|
157 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
|
|
158 (__v4di)
|
|
159 _mm256_setzero_si256 (),
|
|
160 (__mmask8) -1);
|
|
161 }
|
|
162
|
|
163 extern __inline __m256i
|
|
164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
165 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
|
|
166 {
|
|
167 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
|
|
168 (__v4di) __W,
|
|
169 (__mmask8) __U);
|
|
170 }
|
|
171
|
|
172 extern __inline __m256i
|
|
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
174 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
|
|
175 {
|
|
176 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
|
|
177 (__v4di)
|
|
178 _mm256_setzero_si256 (),
|
|
179 (__mmask8) __U);
|
|
180 }
|
|
181
|
|
182 extern __inline __m128i
|
|
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
184 _mm_cvtpd_epi64 (__m128d __A)
|
|
185 {
|
|
186 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
|
|
187 (__v2di)
|
|
188 _mm_setzero_si128 (),
|
|
189 (__mmask8) -1);
|
|
190 }
|
|
191
|
|
192 extern __inline __m128i
|
|
193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
194 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
|
|
195 {
|
|
196 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
|
|
197 (__v2di) __W,
|
|
198 (__mmask8) __U);
|
|
199 }
|
|
200
|
|
201 extern __inline __m128i
|
|
202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
203 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
|
|
204 {
|
|
205 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
|
|
206 (__v2di)
|
|
207 _mm_setzero_si128 (),
|
|
208 (__mmask8) __U);
|
|
209 }
|
|
210
|
|
211 extern __inline __m256i
|
|
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
213 _mm256_cvtpd_epu64 (__m256d __A)
|
|
214 {
|
|
215 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
|
|
216 (__v4di)
|
|
217 _mm256_setzero_si256 (),
|
|
218 (__mmask8) -1);
|
|
219 }
|
|
220
|
|
221 extern __inline __m256i
|
|
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
223 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
|
|
224 {
|
|
225 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
|
|
226 (__v4di) __W,
|
|
227 (__mmask8) __U);
|
|
228 }
|
|
229
|
|
230 extern __inline __m256i
|
|
231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
232 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
|
|
233 {
|
|
234 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
|
|
235 (__v4di)
|
|
236 _mm256_setzero_si256 (),
|
|
237 (__mmask8) __U);
|
|
238 }
|
|
239
|
|
240 extern __inline __m128i
|
|
241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
242 _mm_cvtpd_epu64 (__m128d __A)
|
|
243 {
|
|
244 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
|
|
245 (__v2di)
|
|
246 _mm_setzero_si128 (),
|
|
247 (__mmask8) -1);
|
|
248 }
|
|
249
|
|
250 extern __inline __m128i
|
|
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
252 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
|
|
253 {
|
|
254 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
|
|
255 (__v2di) __W,
|
|
256 (__mmask8) __U);
|
|
257 }
|
|
258
|
|
259 extern __inline __m128i
|
|
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
261 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
|
|
262 {
|
|
263 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
|
|
264 (__v2di)
|
|
265 _mm_setzero_si128 (),
|
|
266 (__mmask8) __U);
|
|
267 }
|
|
268
|
|
269 extern __inline __m256i
|
|
270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
271 _mm256_cvttps_epi64 (__m128 __A)
|
|
272 {
|
|
273 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
|
|
274 (__v4di)
|
|
275 _mm256_setzero_si256 (),
|
|
276 (__mmask8) -1);
|
|
277 }
|
|
278
|
|
279 extern __inline __m256i
|
|
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
281 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
|
|
282 {
|
|
283 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
|
|
284 (__v4di) __W,
|
|
285 (__mmask8) __U);
|
|
286 }
|
|
287
|
|
288 extern __inline __m256i
|
|
289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
290 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
|
|
291 {
|
|
292 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
|
|
293 (__v4di)
|
|
294 _mm256_setzero_si256 (),
|
|
295 (__mmask8) __U);
|
|
296 }
|
|
297
|
|
298 extern __inline __m128i
|
|
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
300 _mm_cvttps_epi64 (__m128 __A)
|
|
301 {
|
|
302 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
|
|
303 (__v2di)
|
|
304 _mm_setzero_si128 (),
|
|
305 (__mmask8) -1);
|
|
306 }
|
|
307
|
|
308 extern __inline __m128i
|
|
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
310 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
|
|
311 {
|
|
312 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
|
|
313 (__v2di) __W,
|
|
314 (__mmask8) __U);
|
|
315 }
|
|
316
|
|
317 extern __inline __m128i
|
|
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
319 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
|
|
320 {
|
|
321 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
|
|
322 (__v2di)
|
|
323 _mm_setzero_si128 (),
|
|
324 (__mmask8) __U);
|
|
325 }
|
|
326
|
|
327 extern __inline __m256i
|
|
328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
329 _mm256_cvttps_epu64 (__m128 __A)
|
|
330 {
|
|
331 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
|
|
332 (__v4di)
|
|
333 _mm256_setzero_si256 (),
|
|
334 (__mmask8) -1);
|
|
335 }
|
|
336
|
|
337 extern __inline __m256i
|
|
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
339 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
|
|
340 {
|
|
341 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
|
|
342 (__v4di) __W,
|
|
343 (__mmask8) __U);
|
|
344 }
|
|
345
|
|
346 extern __inline __m256i
|
|
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
348 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
|
|
349 {
|
|
350 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
|
|
351 (__v4di)
|
|
352 _mm256_setzero_si256 (),
|
|
353 (__mmask8) __U);
|
|
354 }
|
|
355
|
|
356 extern __inline __m128i
|
|
357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
358 _mm_cvttps_epu64 (__m128 __A)
|
|
359 {
|
|
360 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
|
|
361 (__v2di)
|
|
362 _mm_setzero_si128 (),
|
|
363 (__mmask8) -1);
|
|
364 }
|
|
365
|
|
366 extern __inline __m128i
|
|
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
368 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
|
|
369 {
|
|
370 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
|
|
371 (__v2di) __W,
|
|
372 (__mmask8) __U);
|
|
373 }
|
|
374
|
|
375 extern __inline __m128i
|
|
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
377 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
|
|
378 {
|
|
379 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
|
|
380 (__v2di)
|
|
381 _mm_setzero_si128 (),
|
|
382 (__mmask8) __U);
|
|
383 }
|
|
384
|
|
385 extern __inline __m256d
|
|
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
387 _mm256_broadcast_f64x2 (__m128d __A)
|
|
388 {
|
|
389 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
|
|
390 __A,
|
|
391 (__v4df)_mm256_undefined_pd(),
|
|
392 (__mmask8) -1);
|
|
393 }
|
|
394
|
|
395 extern __inline __m256d
|
|
396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
397 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
|
|
398 {
|
|
399 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
|
|
400 __A,
|
|
401 (__v4df)
|
|
402 __O, __M);
|
|
403 }
|
|
404
|
|
405 extern __inline __m256d
|
|
406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
407 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
|
|
408 {
|
|
409 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
|
|
410 __A,
|
|
411 (__v4df)
|
|
412 _mm256_setzero_ps (),
|
|
413 __M);
|
|
414 }
|
|
415
|
|
416 extern __inline __m256i
|
|
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
418 _mm256_broadcast_i64x2 (__m128i __A)
|
|
419 {
|
|
420 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
|
|
421 __A,
|
|
422 (__v4di)_mm256_undefined_si256(),
|
|
423 (__mmask8) -1);
|
|
424 }
|
|
425
|
|
426 extern __inline __m256i
|
|
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
428 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
|
|
429 {
|
|
430 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
|
|
431 __A,
|
|
432 (__v4di)
|
|
433 __O, __M);
|
|
434 }
|
|
435
|
|
436 extern __inline __m256i
|
|
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
438 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
|
|
439 {
|
|
440 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
|
|
441 __A,
|
|
442 (__v4di)
|
|
443 _mm256_setzero_si256 (),
|
|
444 __M);
|
|
445 }
|
|
446
|
|
447 extern __inline __m256
|
|
448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
449 _mm256_broadcast_f32x2 (__m128 __A)
|
|
450 {
|
|
451 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
|
452 (__v8sf)_mm256_undefined_ps(),
|
|
453 (__mmask8) -1);
|
|
454 }
|
|
455
|
|
456 extern __inline __m256
|
|
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
458 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
|
|
459 {
|
|
460 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
|
461 (__v8sf) __O,
|
|
462 __M);
|
|
463 }
|
|
464
|
|
465 extern __inline __m256
|
|
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
467 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
|
|
468 {
|
|
469 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
|
470 (__v8sf)
|
|
471 _mm256_setzero_ps (),
|
|
472 __M);
|
|
473 }
|
|
474
|
|
475 extern __inline __m256i
|
|
476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
477 _mm256_broadcast_i32x2 (__m128i __A)
|
|
478 {
|
|
479 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
|
|
480 __A,
|
|
481 (__v8si)_mm256_undefined_si256(),
|
|
482 (__mmask8) -1);
|
|
483 }
|
|
484
|
|
485 extern __inline __m256i
|
|
486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
487 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
|
|
488 {
|
|
489 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
|
|
490 __A,
|
|
491 (__v8si)
|
|
492 __O, __M);
|
|
493 }
|
|
494
|
|
495 extern __inline __m256i
|
|
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
497 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
|
|
498 {
|
|
499 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
|
|
500 __A,
|
|
501 (__v8si)
|
|
502 _mm256_setzero_si256 (),
|
|
503 __M);
|
|
504 }
|
|
505
|
|
506 extern __inline __m128i
|
|
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
508 _mm_broadcast_i32x2 (__m128i __A)
|
|
509 {
|
|
510 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
|
|
511 __A,
|
|
512 (__v4si)_mm_undefined_si128(),
|
|
513 (__mmask8) -1);
|
|
514 }
|
|
515
|
|
516 extern __inline __m128i
|
|
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
518 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
|
|
519 {
|
|
520 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
|
|
521 __A,
|
|
522 (__v4si)
|
|
523 __O, __M);
|
|
524 }
|
|
525
|
|
526 extern __inline __m128i
|
|
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
528 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
|
|
529 {
|
|
530 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
|
|
531 __A,
|
|
532 (__v4si)
|
|
533 _mm_setzero_si128 (),
|
|
534 __M);
|
|
535 }
|
|
536
|
|
537 extern __inline __m256i
|
|
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
539 _mm256_mullo_epi64 (__m256i __A, __m256i __B)
|
|
540 {
|
|
541 return (__m256i) ((__v4du) __A * (__v4du) __B);
|
|
542 }
|
|
543
|
|
544 extern __inline __m256i
|
|
545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
546 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
|
|
547 __m256i __B)
|
|
548 {
|
|
549 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
|
|
550 (__v4di) __B,
|
|
551 (__v4di) __W,
|
|
552 (__mmask8) __U);
|
|
553 }
|
|
554
|
|
555 extern __inline __m256i
|
|
556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
557 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
|
|
558 {
|
|
559 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
|
|
560 (__v4di) __B,
|
|
561 (__v4di)
|
|
562 _mm256_setzero_si256 (),
|
|
563 (__mmask8) __U);
|
|
564 }
|
|
565
|
|
566 extern __inline __m128i
|
|
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
568 _mm_mullo_epi64 (__m128i __A, __m128i __B)
|
|
569 {
|
|
570 return (__m128i) ((__v2du) __A * (__v2du) __B);
|
|
571 }
|
|
572
|
|
573 extern __inline __m128i
|
|
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
575 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
|
|
576 __m128i __B)
|
|
577 {
|
|
578 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
|
|
579 (__v2di) __B,
|
|
580 (__v2di) __W,
|
|
581 (__mmask8) __U);
|
|
582 }
|
|
583
|
|
584 extern __inline __m128i
|
|
585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
586 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
|
|
587 {
|
|
588 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
|
|
589 (__v2di) __B,
|
|
590 (__v2di)
|
|
591 _mm_setzero_si128 (),
|
|
592 (__mmask8) __U);
|
|
593 }
|
|
594
|
|
595 extern __inline __m256d
|
|
596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
597 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
|
|
598 __m256d __B)
|
|
599 {
|
|
600 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
|
|
601 (__v4df) __B,
|
|
602 (__v4df) __W,
|
|
603 (__mmask8) __U);
|
|
604 }
|
|
605
|
|
606 extern __inline __m256d
|
|
607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
608 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
|
|
609 {
|
|
610 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
|
|
611 (__v4df) __B,
|
|
612 (__v4df)
|
|
613 _mm256_setzero_pd (),
|
|
614 (__mmask8) __U);
|
|
615 }
|
|
616
|
|
617 extern __inline __m128d
|
|
618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
619 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
|
|
620 __m128d __B)
|
|
621 {
|
|
622 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
|
|
623 (__v2df) __B,
|
|
624 (__v2df) __W,
|
|
625 (__mmask8) __U);
|
|
626 }
|
|
627
|
|
628 extern __inline __m128d
|
|
629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
630 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
|
|
631 {
|
|
632 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
|
|
633 (__v2df) __B,
|
|
634 (__v2df)
|
|
635 _mm_setzero_pd (),
|
|
636 (__mmask8) __U);
|
|
637 }
|
|
638
|
|
639 extern __inline __m256
|
|
640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
641 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
|
|
642 __m256 __B)
|
|
643 {
|
|
644 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
|
|
645 (__v8sf) __B,
|
|
646 (__v8sf) __W,
|
|
647 (__mmask8) __U);
|
|
648 }
|
|
649
|
|
650 extern __inline __m256
|
|
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
652 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
|
|
653 {
|
|
654 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
|
|
655 (__v8sf) __B,
|
|
656 (__v8sf)
|
|
657 _mm256_setzero_ps (),
|
|
658 (__mmask8) __U);
|
|
659 }
|
|
660
|
|
661 extern __inline __m128
|
|
662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
663 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
|
664 {
|
|
665 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
|
|
666 (__v4sf) __B,
|
|
667 (__v4sf) __W,
|
|
668 (__mmask8) __U);
|
|
669 }
|
|
670
|
|
671 extern __inline __m128
|
|
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
673 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
|
|
674 {
|
|
675 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
|
|
676 (__v4sf) __B,
|
|
677 (__v4sf)
|
|
678 _mm_setzero_ps (),
|
|
679 (__mmask8) __U);
|
|
680 }
|
|
681
|
|
682 extern __inline __m256i
|
|
683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
684 _mm256_cvtps_epi64 (__m128 __A)
|
|
685 {
|
|
686 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
|
|
687 (__v4di)
|
|
688 _mm256_setzero_si256 (),
|
|
689 (__mmask8) -1);
|
|
690 }
|
|
691
|
|
692 extern __inline __m256i
|
|
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
694 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
|
|
695 {
|
|
696 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
|
|
697 (__v4di) __W,
|
|
698 (__mmask8) __U);
|
|
699 }
|
|
700
|
|
701 extern __inline __m256i
|
|
702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
703 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
|
|
704 {
|
|
705 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
|
|
706 (__v4di)
|
|
707 _mm256_setzero_si256 (),
|
|
708 (__mmask8) __U);
|
|
709 }
|
|
710
|
|
711 extern __inline __m128i
|
|
712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
713 _mm_cvtps_epi64 (__m128 __A)
|
|
714 {
|
|
715 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
|
|
716 (__v2di)
|
|
717 _mm_setzero_si128 (),
|
|
718 (__mmask8) -1);
|
|
719 }
|
|
720
|
|
721 extern __inline __m128i
|
|
722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
723 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
|
|
724 {
|
|
725 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
|
|
726 (__v2di) __W,
|
|
727 (__mmask8) __U);
|
|
728 }
|
|
729
|
|
730 extern __inline __m128i
|
|
731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
732 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
|
|
733 {
|
|
734 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
|
|
735 (__v2di)
|
|
736 _mm_setzero_si128 (),
|
|
737 (__mmask8) __U);
|
|
738 }
|
|
739
|
|
740 extern __inline __m256i
|
|
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
742 _mm256_cvtps_epu64 (__m128 __A)
|
|
743 {
|
|
744 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
|
|
745 (__v4di)
|
|
746 _mm256_setzero_si256 (),
|
|
747 (__mmask8) -1);
|
|
748 }
|
|
749
|
|
750 extern __inline __m256i
|
|
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
752 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
|
|
753 {
|
|
754 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
|
|
755 (__v4di) __W,
|
|
756 (__mmask8) __U);
|
|
757 }
|
|
758
|
|
759 extern __inline __m256i
|
|
760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
761 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
|
|
762 {
|
|
763 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
|
|
764 (__v4di)
|
|
765 _mm256_setzero_si256 (),
|
|
766 (__mmask8) __U);
|
|
767 }
|
|
768
|
|
769 extern __inline __m128i
|
|
770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
771 _mm_cvtps_epu64 (__m128 __A)
|
|
772 {
|
|
773 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
|
|
774 (__v2di)
|
|
775 _mm_setzero_si128 (),
|
|
776 (__mmask8) -1);
|
|
777 }
|
|
778
|
|
779 extern __inline __m128i
|
|
780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
781 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
|
|
782 {
|
|
783 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
|
|
784 (__v2di) __W,
|
|
785 (__mmask8) __U);
|
|
786 }
|
|
787
|
|
788 extern __inline __m128i
|
|
789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
790 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
|
|
791 {
|
|
792 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
|
|
793 (__v2di)
|
|
794 _mm_setzero_si128 (),
|
|
795 (__mmask8) __U);
|
|
796 }
|
|
797
|
|
798 extern __inline __m128
|
|
799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
800 _mm256_cvtepi64_ps (__m256i __A)
|
|
801 {
|
|
802 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
|
|
803 (__v4sf)
|
|
804 _mm_setzero_ps (),
|
|
805 (__mmask8) -1);
|
|
806 }
|
|
807
|
|
808 extern __inline __m128
|
|
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
810 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
|
|
811 {
|
|
812 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
|
|
813 (__v4sf) __W,
|
|
814 (__mmask8) __U);
|
|
815 }
|
|
816
|
|
817 extern __inline __m128
|
|
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
819 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
|
|
820 {
|
|
821 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
|
|
822 (__v4sf)
|
|
823 _mm_setzero_ps (),
|
|
824 (__mmask8) __U);
|
|
825 }
|
|
826
|
|
827 extern __inline __m128
|
|
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
829 _mm_cvtepi64_ps (__m128i __A)
|
|
830 {
|
|
831 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
|
|
832 (__v4sf)
|
|
833 _mm_setzero_ps (),
|
|
834 (__mmask8) -1);
|
|
835 }
|
|
836
|
|
837 extern __inline __m128
|
|
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
839 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
|
|
840 {
|
|
841 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
|
|
842 (__v4sf) __W,
|
|
843 (__mmask8) __U);
|
|
844 }
|
|
845
|
|
846 extern __inline __m128
|
|
847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
848 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
|
|
849 {
|
|
850 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
|
|
851 (__v4sf)
|
|
852 _mm_setzero_ps (),
|
|
853 (__mmask8) __U);
|
|
854 }
|
|
855
|
|
856 extern __inline __m128
|
|
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
858 _mm256_cvtepu64_ps (__m256i __A)
|
|
859 {
|
|
860 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
|
|
861 (__v4sf)
|
|
862 _mm_setzero_ps (),
|
|
863 (__mmask8) -1);
|
|
864 }
|
|
865
|
|
866 extern __inline __m128
|
|
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
868 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
|
|
869 {
|
|
870 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
|
|
871 (__v4sf) __W,
|
|
872 (__mmask8) __U);
|
|
873 }
|
|
874
|
|
875 extern __inline __m128
|
|
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
877 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
|
|
878 {
|
|
879 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
|
|
880 (__v4sf)
|
|
881 _mm_setzero_ps (),
|
|
882 (__mmask8) __U);
|
|
883 }
|
|
884
|
|
885 extern __inline __m128
|
|
886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
887 _mm_cvtepu64_ps (__m128i __A)
|
|
888 {
|
|
889 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
|
|
890 (__v4sf)
|
|
891 _mm_setzero_ps (),
|
|
892 (__mmask8) -1);
|
|
893 }
|
|
894
|
|
895 extern __inline __m128
|
|
896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
897 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
|
|
898 {
|
|
899 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
|
|
900 (__v4sf) __W,
|
|
901 (__mmask8) __U);
|
|
902 }
|
|
903
|
|
904 extern __inline __m128
|
|
905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
906 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
|
|
907 {
|
|
908 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
|
|
909 (__v4sf)
|
|
910 _mm_setzero_ps (),
|
|
911 (__mmask8) __U);
|
|
912 }
|
|
913
|
|
914 extern __inline __m256d
|
|
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
916 _mm256_cvtepi64_pd (__m256i __A)
|
|
917 {
|
|
918 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
|
|
919 (__v4df)
|
|
920 _mm256_setzero_pd (),
|
|
921 (__mmask8) -1);
|
|
922 }
|
|
923
|
|
924 extern __inline __m256d
|
|
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
926 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
|
|
927 {
|
|
928 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
|
|
929 (__v4df) __W,
|
|
930 (__mmask8) __U);
|
|
931 }
|
|
932
|
|
933 extern __inline __m256d
|
|
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
935 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
|
|
936 {
|
|
937 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
|
|
938 (__v4df)
|
|
939 _mm256_setzero_pd (),
|
|
940 (__mmask8) __U);
|
|
941 }
|
|
942
|
|
943 extern __inline __m128d
|
|
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
945 _mm_cvtepi64_pd (__m128i __A)
|
|
946 {
|
|
947 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
|
|
948 (__v2df)
|
|
949 _mm_setzero_pd (),
|
|
950 (__mmask8) -1);
|
|
951 }
|
|
952
|
|
953 extern __inline __m128d
|
|
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
955 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
|
|
956 {
|
|
957 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
|
|
958 (__v2df) __W,
|
|
959 (__mmask8) __U);
|
|
960 }
|
|
961
|
|
962 extern __inline __m128d
|
|
963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
964 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
|
|
965 {
|
|
966 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
|
|
967 (__v2df)
|
|
968 _mm_setzero_pd (),
|
|
969 (__mmask8) __U);
|
|
970 }
|
|
971
|
|
972 extern __inline __m256d
|
|
973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
974 _mm256_cvtepu64_pd (__m256i __A)
|
|
975 {
|
|
976 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
|
|
977 (__v4df)
|
|
978 _mm256_setzero_pd (),
|
|
979 (__mmask8) -1);
|
|
980 }
|
|
981
|
|
982 extern __inline __m256d
|
|
983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
984 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
|
|
985 {
|
|
986 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
|
|
987 (__v4df) __W,
|
|
988 (__mmask8) __U);
|
|
989 }
|
|
990
|
|
991 extern __inline __m256d
|
|
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
993 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
|
|
994 {
|
|
995 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
|
|
996 (__v4df)
|
|
997 _mm256_setzero_pd (),
|
|
998 (__mmask8) __U);
|
|
999 }
|
|
1000
|
|
1001 extern __inline __m256d
|
|
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1003 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
|
|
1004 __m256d __B)
|
|
1005 {
|
|
1006 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
|
|
1007 (__v4df) __B,
|
|
1008 (__v4df) __W,
|
|
1009 (__mmask8) __U);
|
|
1010 }
|
|
1011
|
|
1012 extern __inline __m256d
|
|
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1014 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
|
|
1015 {
|
|
1016 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
|
|
1017 (__v4df) __B,
|
|
1018 (__v4df)
|
|
1019 _mm256_setzero_pd (),
|
|
1020 (__mmask8) __U);
|
|
1021 }
|
|
1022
|
|
1023 extern __inline __m128d
|
|
1024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1025 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
|
|
1026 {
|
|
1027 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
|
|
1028 (__v2df) __B,
|
|
1029 (__v2df) __W,
|
|
1030 (__mmask8) __U);
|
|
1031 }
|
|
1032
|
|
1033 extern __inline __m128d
|
|
1034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1035 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
|
|
1036 {
|
|
1037 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
|
|
1038 (__v2df) __B,
|
|
1039 (__v2df)
|
|
1040 _mm_setzero_pd (),
|
|
1041 (__mmask8) __U);
|
|
1042 }
|
|
1043
|
|
1044 extern __inline __m256
|
|
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1046 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
|
|
1047 {
|
|
1048 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
|
|
1049 (__v8sf) __B,
|
|
1050 (__v8sf) __W,
|
|
1051 (__mmask8) __U);
|
|
1052 }
|
|
1053
|
|
1054 extern __inline __m256
|
|
1055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1056 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
|
|
1057 {
|
|
1058 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
|
|
1059 (__v8sf) __B,
|
|
1060 (__v8sf)
|
|
1061 _mm256_setzero_ps (),
|
|
1062 (__mmask8) __U);
|
|
1063 }
|
|
1064
|
|
1065 extern __inline __m128
|
|
1066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1067 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
|
1068 {
|
|
1069 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
|
|
1070 (__v4sf) __B,
|
|
1071 (__v4sf) __W,
|
|
1072 (__mmask8) __U);
|
|
1073 }
|
|
1074
|
|
1075 extern __inline __m128
|
|
1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1077 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
|
|
1078 {
|
|
1079 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
|
|
1080 (__v4sf) __B,
|
|
1081 (__v4sf)
|
|
1082 _mm_setzero_ps (),
|
|
1083 (__mmask8) __U);
|
|
1084 }
|
|
1085
|
|
1086 extern __inline __m128d
|
|
1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1088 _mm_cvtepu64_pd (__m128i __A)
|
|
1089 {
|
|
1090 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
|
|
1091 (__v2df)
|
|
1092 _mm_setzero_pd (),
|
|
1093 (__mmask8) -1);
|
|
1094 }
|
|
1095
|
|
1096 extern __inline __m128d
|
|
1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1098 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
|
|
1099 {
|
|
1100 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
|
|
1101 (__v2df) __W,
|
|
1102 (__mmask8) __U);
|
|
1103 }
|
|
1104
|
|
1105 extern __inline __m128d
|
|
1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1107 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
|
|
1108 {
|
|
1109 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
|
|
1110 (__v2df)
|
|
1111 _mm_setzero_pd (),
|
|
1112 (__mmask8) __U);
|
|
1113 }
|
|
1114
|
|
1115 extern __inline __m256d
|
|
1116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1117 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
|
|
1118 __m256d __B)
|
|
1119 {
|
|
1120 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
|
|
1121 (__v4df) __B,
|
|
1122 (__v4df) __W,
|
|
1123 (__mmask8) __U);
|
|
1124 }
|
|
1125
|
|
1126 extern __inline __m256d
|
|
1127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1128 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
|
|
1129 {
|
|
1130 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
|
|
1131 (__v4df) __B,
|
|
1132 (__v4df)
|
|
1133 _mm256_setzero_pd (),
|
|
1134 (__mmask8) __U);
|
|
1135 }
|
|
1136
|
|
1137 extern __inline __m128d
|
|
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1139 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
|
|
1140 {
|
|
1141 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
|
|
1142 (__v2df) __B,
|
|
1143 (__v2df) __W,
|
|
1144 (__mmask8) __U);
|
|
1145 }
|
|
1146
|
|
1147 extern __inline __m128d
|
|
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1149 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
|
|
1150 {
|
|
1151 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
|
|
1152 (__v2df) __B,
|
|
1153 (__v2df)
|
|
1154 _mm_setzero_pd (),
|
|
1155 (__mmask8) __U);
|
|
1156 }
|
|
1157
|
|
1158 extern __inline __m256
|
|
1159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1160 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
|
|
1161 {
|
|
1162 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
|
|
1163 (__v8sf) __B,
|
|
1164 (__v8sf) __W,
|
|
1165 (__mmask8) __U);
|
|
1166 }
|
|
1167
|
|
1168 extern __inline __m256
|
|
1169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1170 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
|
|
1171 {
|
|
1172 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
|
|
1173 (__v8sf) __B,
|
|
1174 (__v8sf)
|
|
1175 _mm256_setzero_ps (),
|
|
1176 (__mmask8) __U);
|
|
1177 }
|
|
1178
|
|
1179 extern __inline __m128
|
|
1180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1181 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
|
1182 {
|
|
1183 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
|
|
1184 (__v4sf) __B,
|
|
1185 (__v4sf) __W,
|
|
1186 (__mmask8) __U);
|
|
1187 }
|
|
1188
|
|
1189 extern __inline __m128
|
|
1190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1191 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
|
|
1192 {
|
|
1193 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
|
|
1194 (__v4sf) __B,
|
|
1195 (__v4sf)
|
|
1196 _mm_setzero_ps (),
|
|
1197 (__mmask8) __U);
|
|
1198 }
|
|
1199
|
|
1200 extern __inline __m256d
|
|
1201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1202 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
|
|
1203 {
|
|
1204 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
|
|
1205 (__v4df) __B,
|
|
1206 (__v4df) __W,
|
|
1207 (__mmask8) __U);
|
|
1208 }
|
|
1209
|
|
1210 extern __inline __m256d
|
|
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1212 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
|
|
1213 {
|
|
1214 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
|
|
1215 (__v4df) __B,
|
|
1216 (__v4df)
|
|
1217 _mm256_setzero_pd (),
|
|
1218 (__mmask8) __U);
|
|
1219 }
|
|
1220
|
|
1221 extern __inline __m128d
|
|
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1223 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
|
|
1224 {
|
|
1225 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
|
|
1226 (__v2df) __B,
|
|
1227 (__v2df) __W,
|
|
1228 (__mmask8) __U);
|
|
1229 }
|
|
1230
|
|
1231 extern __inline __m128d
|
|
1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1233 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
|
|
1234 {
|
|
1235 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
|
|
1236 (__v2df) __B,
|
|
1237 (__v2df)
|
|
1238 _mm_setzero_pd (),
|
|
1239 (__mmask8) __U);
|
|
1240 }
|
|
1241
|
|
1242 extern __inline __m256
|
|
1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1244 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
|
|
1245 {
|
|
1246 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
|
|
1247 (__v8sf) __B,
|
|
1248 (__v8sf) __W,
|
|
1249 (__mmask8) __U);
|
|
1250 }
|
|
1251
|
|
1252 extern __inline __m256
|
|
1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1254 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
|
|
1255 {
|
|
1256 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
|
|
1257 (__v8sf) __B,
|
|
1258 (__v8sf)
|
|
1259 _mm256_setzero_ps (),
|
|
1260 (__mmask8) __U);
|
|
1261 }
|
|
1262
|
|
1263 extern __inline __m128
|
|
1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1265 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
|
1266 {
|
|
1267 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
|
|
1268 (__v4sf) __B,
|
|
1269 (__v4sf) __W,
|
|
1270 (__mmask8) __U);
|
|
1271 }
|
|
1272
|
|
1273 extern __inline __m128
|
|
1274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1275 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
|
|
1276 {
|
|
1277 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
|
|
1278 (__v4sf) __B,
|
|
1279 (__v4sf)
|
|
1280 _mm_setzero_ps (),
|
|
1281 (__mmask8) __U);
|
|
1282 }
|
|
1283
|
|
1284 extern __inline __m128i
|
|
1285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1286 _mm_movm_epi32 (__mmask8 __A)
|
|
1287 {
|
|
1288 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
|
|
1289 }
|
|
1290
|
|
1291 extern __inline __m256i
|
|
1292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1293 _mm256_movm_epi32 (__mmask8 __A)
|
|
1294 {
|
|
1295 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
|
|
1296 }
|
|
1297
|
|
1298 extern __inline __m128i
|
|
1299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1300 _mm_movm_epi64 (__mmask8 __A)
|
|
1301 {
|
|
1302 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
|
|
1303 }
|
|
1304
|
|
1305 extern __inline __m256i
|
|
1306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1307 _mm256_movm_epi64 (__mmask8 __A)
|
|
1308 {
|
|
1309 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
|
|
1310 }
|
|
1311
|
|
1312 extern __inline __mmask8
|
|
1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1314 _mm_movepi32_mask (__m128i __A)
|
|
1315 {
|
|
1316 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
|
|
1317 }
|
|
1318
|
|
1319 extern __inline __mmask8
|
|
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1321 _mm256_movepi32_mask (__m256i __A)
|
|
1322 {
|
|
1323 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
|
|
1324 }
|
|
1325
|
|
1326 extern __inline __mmask8
|
|
1327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1328 _mm_movepi64_mask (__m128i __A)
|
|
1329 {
|
|
1330 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
|
|
1331 }
|
|
1332
|
|
1333 extern __inline __mmask8
|
|
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1335 _mm256_movepi64_mask (__m256i __A)
|
|
1336 {
|
|
1337 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
|
|
1338 }
|
|
1339
|
|
1340 #ifdef __OPTIMIZE__
|
|
1341 extern __inline __m128d
|
|
1342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1343 _mm256_extractf64x2_pd (__m256d __A, const int __imm)
|
|
1344 {
|
|
1345 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
|
|
1346 __imm,
|
|
1347 (__v2df)
|
|
1348 _mm_setzero_pd (),
|
|
1349 (__mmask8) -1);
|
|
1350 }
|
|
1351
|
|
1352 extern __inline __m128d
|
|
1353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1354 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
|
|
1355 const int __imm)
|
|
1356 {
|
|
1357 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
|
|
1358 __imm,
|
|
1359 (__v2df) __W,
|
|
1360 (__mmask8)
|
|
1361 __U);
|
|
1362 }
|
|
1363
|
|
1364 extern __inline __m128d
|
|
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1366 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
|
|
1367 const int __imm)
|
|
1368 {
|
|
1369 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
|
|
1370 __imm,
|
|
1371 (__v2df)
|
|
1372 _mm_setzero_pd (),
|
|
1373 (__mmask8)
|
|
1374 __U);
|
|
1375 }
|
|
1376
|
|
1377 extern __inline __m128i
|
|
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1379 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
|
|
1380 {
|
|
1381 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
|
|
1382 __imm,
|
|
1383 (__v2di)
|
|
1384 _mm_setzero_si128 (),
|
|
1385 (__mmask8) -1);
|
|
1386 }
|
|
1387
|
|
1388 extern __inline __m128i
|
|
1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1390 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
|
|
1391 const int __imm)
|
|
1392 {
|
|
1393 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
|
|
1394 __imm,
|
|
1395 (__v2di) __W,
|
|
1396 (__mmask8)
|
|
1397 __U);
|
|
1398 }
|
|
1399
|
|
1400 extern __inline __m128i
|
|
1401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1402 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
|
|
1403 const int __imm)
|
|
1404 {
|
|
1405 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
|
|
1406 __imm,
|
|
1407 (__v2di)
|
|
1408 _mm_setzero_si128 (),
|
|
1409 (__mmask8)
|
|
1410 __U);
|
|
1411 }
|
|
1412
|
|
1413 extern __inline __m256d
|
|
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1415 _mm256_reduce_pd (__m256d __A, int __B)
|
|
1416 {
|
|
1417 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
|
|
1418 (__v4df)
|
|
1419 _mm256_setzero_pd (),
|
|
1420 (__mmask8) -1);
|
|
1421 }
|
|
1422
|
|
1423 extern __inline __m256d
|
|
1424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1425 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
|
|
1426 {
|
|
1427 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
|
|
1428 (__v4df) __W,
|
|
1429 (__mmask8) __U);
|
|
1430 }
|
|
1431
|
|
1432 extern __inline __m256d
|
|
1433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1434 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
|
|
1435 {
|
|
1436 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
|
|
1437 (__v4df)
|
|
1438 _mm256_setzero_pd (),
|
|
1439 (__mmask8) __U);
|
|
1440 }
|
|
1441
|
|
1442 extern __inline __m128d
|
|
1443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1444 _mm_reduce_pd (__m128d __A, int __B)
|
|
1445 {
|
|
1446 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
|
|
1447 (__v2df)
|
|
1448 _mm_setzero_pd (),
|
|
1449 (__mmask8) -1);
|
|
1450 }
|
|
1451
|
|
1452 extern __inline __m128d
|
|
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1454 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
|
|
1455 {
|
|
1456 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
|
|
1457 (__v2df) __W,
|
|
1458 (__mmask8) __U);
|
|
1459 }
|
|
1460
|
|
1461 extern __inline __m128d
|
|
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1463 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
|
|
1464 {
|
|
1465 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
|
|
1466 (__v2df)
|
|
1467 _mm_setzero_pd (),
|
|
1468 (__mmask8) __U);
|
|
1469 }
|
|
1470
|
|
1471 extern __inline __m256
|
|
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1473 _mm256_reduce_ps (__m256 __A, int __B)
|
|
1474 {
|
|
1475 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
|
|
1476 (__v8sf)
|
|
1477 _mm256_setzero_ps (),
|
|
1478 (__mmask8) -1);
|
|
1479 }
|
|
1480
|
|
1481 extern __inline __m256
|
|
1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1483 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
|
|
1484 {
|
|
1485 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
|
|
1486 (__v8sf) __W,
|
|
1487 (__mmask8) __U);
|
|
1488 }
|
|
1489
|
|
1490 extern __inline __m256
|
|
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1492 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
|
|
1493 {
|
|
1494 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
|
|
1495 (__v8sf)
|
|
1496 _mm256_setzero_ps (),
|
|
1497 (__mmask8) __U);
|
|
1498 }
|
|
1499
|
|
1500 extern __inline __m128
|
|
1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1502 _mm_reduce_ps (__m128 __A, int __B)
|
|
1503 {
|
|
1504 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
|
|
1505 (__v4sf)
|
|
1506 _mm_setzero_ps (),
|
|
1507 (__mmask8) -1);
|
|
1508 }
|
|
1509
|
|
1510 extern __inline __m128
|
|
1511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1512 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
|
|
1513 {
|
|
1514 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
|
|
1515 (__v4sf) __W,
|
|
1516 (__mmask8) __U);
|
|
1517 }
|
|
1518
|
|
1519 extern __inline __m128
|
|
1520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1521 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
|
|
1522 {
|
|
1523 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
|
|
1524 (__v4sf)
|
|
1525 _mm_setzero_ps (),
|
|
1526 (__mmask8) __U);
|
|
1527 }
|
|
1528
|
|
1529 extern __inline __m256d
|
|
1530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1531 _mm256_range_pd (__m256d __A, __m256d __B, int __C)
|
|
1532 {
|
|
1533 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
|
|
1534 (__v4df) __B, __C,
|
|
1535 (__v4df)
|
|
1536 _mm256_setzero_pd (),
|
|
1537 (__mmask8) -1);
|
|
1538 }
|
|
1539
|
|
1540 extern __inline __m256d
|
|
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1542 _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
|
|
1543 __m256d __A, __m256d __B, int __C)
|
|
1544 {
|
|
1545 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
|
|
1546 (__v4df) __B, __C,
|
|
1547 (__v4df) __W,
|
|
1548 (__mmask8) __U);
|
|
1549 }
|
|
1550
|
|
1551 extern __inline __m256d
|
|
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1553 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
|
|
1554 {
|
|
1555 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
|
|
1556 (__v4df) __B, __C,
|
|
1557 (__v4df)
|
|
1558 _mm256_setzero_pd (),
|
|
1559 (__mmask8) __U);
|
|
1560 }
|
|
1561
|
|
1562 extern __inline __m128d
|
|
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1564 _mm_range_pd (__m128d __A, __m128d __B, int __C)
|
|
1565 {
|
|
1566 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
|
|
1567 (__v2df) __B, __C,
|
|
1568 (__v2df)
|
|
1569 _mm_setzero_pd (),
|
|
1570 (__mmask8) -1);
|
|
1571 }
|
|
1572
|
|
1573 extern __inline __m128d
|
|
1574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1575 _mm_mask_range_pd (__m128d __W, __mmask8 __U,
|
|
1576 __m128d __A, __m128d __B, int __C)
|
|
1577 {
|
|
1578 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
|
|
1579 (__v2df) __B, __C,
|
|
1580 (__v2df) __W,
|
|
1581 (__mmask8) __U);
|
|
1582 }
|
|
1583
|
|
1584 extern __inline __m128d
|
|
1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1586 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
|
|
1587 {
|
|
1588 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
|
|
1589 (__v2df) __B, __C,
|
|
1590 (__v2df)
|
|
1591 _mm_setzero_pd (),
|
|
1592 (__mmask8) __U);
|
|
1593 }
|
|
1594
|
|
1595 extern __inline __m256
|
|
1596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1597 _mm256_range_ps (__m256 __A, __m256 __B, int __C)
|
|
1598 {
|
|
1599 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
|
|
1600 (__v8sf) __B, __C,
|
|
1601 (__v8sf)
|
|
1602 _mm256_setzero_ps (),
|
|
1603 (__mmask8) -1);
|
|
1604 }
|
|
1605
|
|
1606 extern __inline __m256
|
|
1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1608 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
|
|
1609 int __C)
|
|
1610 {
|
|
1611 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
|
|
1612 (__v8sf) __B, __C,
|
|
1613 (__v8sf) __W,
|
|
1614 (__mmask8) __U);
|
|
1615 }
|
|
1616
|
|
1617 extern __inline __m256
|
|
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1619 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
|
|
1620 {
|
|
1621 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
|
|
1622 (__v8sf) __B, __C,
|
|
1623 (__v8sf)
|
|
1624 _mm256_setzero_ps (),
|
|
1625 (__mmask8) __U);
|
|
1626 }
|
|
1627
|
|
1628 extern __inline __m128
|
|
1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1630 _mm_range_ps (__m128 __A, __m128 __B, int __C)
|
|
1631 {
|
|
1632 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
|
|
1633 (__v4sf) __B, __C,
|
|
1634 (__v4sf)
|
|
1635 _mm_setzero_ps (),
|
|
1636 (__mmask8) -1);
|
|
1637 }
|
|
1638
|
|
1639 extern __inline __m128
|
|
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1641 _mm_mask_range_ps (__m128 __W, __mmask8 __U,
|
|
1642 __m128 __A, __m128 __B, int __C)
|
|
1643 {
|
|
1644 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
|
|
1645 (__v4sf) __B, __C,
|
|
1646 (__v4sf) __W,
|
|
1647 (__mmask8) __U);
|
|
1648 }
|
|
1649
|
|
1650 extern __inline __m128
|
|
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1652 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
|
|
1653 {
|
|
1654 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
|
|
1655 (__v4sf) __B, __C,
|
|
1656 (__v4sf)
|
|
1657 _mm_setzero_ps (),
|
|
1658 (__mmask8) __U);
|
|
1659 }
|
|
1660
|
|
1661 extern __inline __mmask8
|
|
1662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1663 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
|
|
1664 const int __imm)
|
|
1665 {
|
|
1666 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
|
|
1667 __imm, __U);
|
|
1668 }
|
|
1669
|
|
1670 extern __inline __mmask8
|
|
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1672 _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
|
|
1673 {
|
|
1674 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
|
|
1675 __imm,
|
|
1676 (__mmask8) -1);
|
|
1677 }
|
|
1678
|
|
1679 extern __inline __mmask8
|
|
1680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1681 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
|
|
1682 {
|
|
1683 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
|
|
1684 __imm, __U);
|
|
1685 }
|
|
1686
|
|
1687 extern __inline __mmask8
|
|
1688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1689 _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
|
|
1690 {
|
|
1691 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
|
|
1692 __imm,
|
|
1693 (__mmask8) -1);
|
|
1694 }
|
|
1695
|
|
1696 extern __inline __mmask8
|
|
1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1698 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
|
|
1699 {
|
|
1700 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
|
|
1701 __imm, __U);
|
|
1702 }
|
|
1703
|
|
1704 extern __inline __mmask8
|
|
1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1706 _mm_fpclass_pd_mask (__m128d __A, const int __imm)
|
|
1707 {
|
|
1708 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
|
|
1709 __imm,
|
|
1710 (__mmask8) -1);
|
|
1711 }
|
|
1712
|
|
1713 extern __inline __mmask8
|
|
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1715 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
|
|
1716 {
|
|
1717 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
|
|
1718 __imm, __U);
|
|
1719 }
|
|
1720
|
|
1721 extern __inline __mmask8
|
|
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1723 _mm_fpclass_ps_mask (__m128 __A, const int __imm)
|
|
1724 {
|
|
1725 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
|
|
1726 __imm,
|
|
1727 (__mmask8) -1);
|
|
1728 }
|
|
1729
|
|
1730 extern __inline __m256i
|
|
1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1732 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
|
|
1733 {
|
|
1734 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
|
|
1735 (__v2di) __B,
|
|
1736 __imm,
|
|
1737 (__v4di)
|
|
1738 _mm256_setzero_si256 (),
|
|
1739 (__mmask8) -1);
|
|
1740 }
|
|
1741
|
|
1742 extern __inline __m256i
|
|
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1744 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
|
|
1745 __m128i __B, const int __imm)
|
|
1746 {
|
|
1747 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
|
|
1748 (__v2di) __B,
|
|
1749 __imm,
|
|
1750 (__v4di) __W,
|
|
1751 (__mmask8)
|
|
1752 __U);
|
|
1753 }
|
|
1754
|
|
1755 extern __inline __m256i
|
|
1756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1757 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
|
|
1758 const int __imm)
|
|
1759 {
|
|
1760 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
|
|
1761 (__v2di) __B,
|
|
1762 __imm,
|
|
1763 (__v4di)
|
|
1764 _mm256_setzero_si256 (),
|
|
1765 (__mmask8)
|
|
1766 __U);
|
|
1767 }
|
|
1768
|
|
1769 extern __inline __m256d
|
|
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1771 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
|
|
1772 {
|
|
1773 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
|
|
1774 (__v2df) __B,
|
|
1775 __imm,
|
|
1776 (__v4df)
|
|
1777 _mm256_setzero_pd (),
|
|
1778 (__mmask8) -1);
|
|
1779 }
|
|
1780
|
|
1781 extern __inline __m256d
|
|
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1783 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
|
|
1784 __m128d __B, const int __imm)
|
|
1785 {
|
|
1786 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
|
|
1787 (__v2df) __B,
|
|
1788 __imm,
|
|
1789 (__v4df) __W,
|
|
1790 (__mmask8)
|
|
1791 __U);
|
|
1792 }
|
|
1793
|
|
1794 extern __inline __m256d
|
|
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
|
1796 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
|
|
1797 const int __imm)
|
|
1798 {
|
|
1799 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
|
|
1800 (__v2df) __B,
|
|
1801 __imm,
|
|
1802 (__v4df)
|
|
1803 _mm256_setzero_pd (),
|
|
1804 (__mmask8)
|
|
1805 __U);
|
|
1806 }
|
|
1807
|
|
1808 #else
|
|
1809 #define _mm256_insertf64x2(X, Y, C) \
|
|
1810 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1811 (__v2df)(__m128d) (Y), (int) (C), \
|
|
1812 (__v4df)(__m256d)_mm256_setzero_pd(), \
|
|
1813 (__mmask8)-1))
|
|
1814
|
|
1815 #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
|
|
1816 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1817 (__v2df)(__m128d) (Y), (int) (C), \
|
|
1818 (__v4df)(__m256d)(W), \
|
|
1819 (__mmask8)(U)))
|
|
1820
|
|
1821 #define _mm256_maskz_insertf64x2(U, X, Y, C) \
|
|
1822 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1823 (__v2df)(__m128d) (Y), (int) (C), \
|
|
1824 (__v4df)(__m256d)_mm256_setzero_pd(), \
|
|
1825 (__mmask8)(U)))
|
|
1826
|
|
1827 #define _mm256_inserti64x2(X, Y, C) \
|
|
1828 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1829 (__v2di)(__m128i) (Y), (int) (C), \
|
|
1830 (__v4di)(__m256i)_mm256_setzero_si256 (), \
|
|
1831 (__mmask8)-1))
|
|
1832
|
|
1833 #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
|
|
1834 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1835 (__v2di)(__m128i) (Y), (int) (C), \
|
|
1836 (__v4di)(__m256i)(W), \
|
|
1837 (__mmask8)(U)))
|
|
1838
|
|
1839 #define _mm256_maskz_inserti64x2(U, X, Y, C) \
|
|
1840 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1841 (__v2di)(__m128i) (Y), (int) (C), \
|
|
1842 (__v4di)(__m256i)_mm256_setzero_si256 (), \
|
|
1843 (__mmask8)(U)))
|
|
1844
|
|
1845 #define _mm256_extractf64x2_pd(X, C) \
|
|
1846 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1847 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
|
|
1848
|
|
1849 #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
|
|
1850 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1851 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
|
|
1852
|
|
1853 #define _mm256_maskz_extractf64x2_pd(U, X, C) \
|
|
1854 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
|
|
1855 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
|
|
1856
|
|
1857 #define _mm256_extracti64x2_epi64(X, C) \
|
|
1858 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1859 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
|
|
1860
|
|
1861 #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
|
|
1862 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1863 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
|
|
1864
|
|
1865 #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
|
|
1866 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
|
|
1867 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
|
|
1868
|
|
1869 #define _mm256_reduce_pd(A, B) \
|
|
1870 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
|
|
1871 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
|
|
1872
|
|
1873 #define _mm256_mask_reduce_pd(W, U, A, B) \
|
|
1874 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
|
|
1875 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
|
|
1876
|
|
1877 #define _mm256_maskz_reduce_pd(U, A, B) \
|
|
1878 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
|
|
1879 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
|
|
1880
|
|
1881 #define _mm_reduce_pd(A, B) \
|
|
1882 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
|
|
1883 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
|
|
1884
|
|
1885 #define _mm_mask_reduce_pd(W, U, A, B) \
|
|
1886 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
|
|
1887 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
|
|
1888
|
|
1889 #define _mm_maskz_reduce_pd(U, A, B) \
|
|
1890 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
|
|
1891 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
|
|
1892
|
|
1893 #define _mm256_reduce_ps(A, B) \
|
|
1894 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
|
|
1895 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
|
|
1896
|
|
1897 #define _mm256_mask_reduce_ps(W, U, A, B) \
|
|
1898 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
|
|
1899 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
|
|
1900
|
|
1901 #define _mm256_maskz_reduce_ps(U, A, B) \
|
|
1902 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
|
|
1903 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
|
|
1904
|
|
1905 #define _mm_reduce_ps(A, B) \
|
|
1906 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
|
|
1907 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
|
|
1908
|
|
1909 #define _mm_mask_reduce_ps(W, U, A, B) \
|
|
1910 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
|
|
1911 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
|
|
1912
|
|
1913 #define _mm_maskz_reduce_ps(U, A, B) \
|
|
1914 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
|
|
1915 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
|
|
1916
|
|
1917 #define _mm256_range_pd(A, B, C) \
|
|
1918 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
|
|
1919 (__v4df)(__m256d)(B), (int)(C), \
|
|
1920 (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
|
|
1921
|
|
1922 #define _mm256_maskz_range_pd(U, A, B, C) \
|
|
1923 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
|
|
1924 (__v4df)(__m256d)(B), (int)(C), \
|
|
1925 (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
|
|
1926
|
|
1927 #define _mm_range_pd(A, B, C) \
|
|
1928 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
|
|
1929 (__v2df)(__m128d)(B), (int)(C), \
|
|
1930 (__v2df)_mm_setzero_pd(), (__mmask8)-1))
|
|
1931
|
|
1932 #define _mm256_range_ps(A, B, C) \
|
|
1933 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
|
|
1934 (__v8sf)(__m256)(B), (int)(C), \
|
|
1935 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
|
|
1936
|
|
1937 #define _mm256_mask_range_ps(W, U, A, B, C) \
|
|
1938 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
|
|
1939 (__v8sf)(__m256)(B), (int)(C), \
|
|
1940 (__v8sf)(__m256)(W), (__mmask8)(U)))
|
|
1941
|
|
1942 #define _mm256_maskz_range_ps(U, A, B, C) \
|
|
1943 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
|
|
1944 (__v8sf)(__m256)(B), (int)(C), \
|
|
1945 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
|
|
1946
|
|
1947 #define _mm_range_ps(A, B, C) \
|
|
1948 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
|
|
1949 (__v4sf)(__m128)(B), (int)(C), \
|
|
1950 (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
|
|
1951
|
|
1952 #define _mm_mask_range_ps(W, U, A, B, C) \
|
|
1953 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
|
|
1954 (__v4sf)(__m128)(B), (int)(C), \
|
|
1955 (__v4sf)(__m128)(W), (__mmask8)(U)))
|
|
1956
|
|
1957 #define _mm_maskz_range_ps(U, A, B, C) \
|
|
1958 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
|
|
1959 (__v4sf)(__m128)(B), (int)(C), \
|
|
1960 (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
|
|
1961
|
|
1962 #define _mm256_mask_range_pd(W, U, A, B, C) \
|
|
1963 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
|
|
1964 (__v4df)(__m256d)(B), (int)(C), \
|
|
1965 (__v4df)(__m256d)(W), (__mmask8)(U)))
|
|
1966
|
|
1967 #define _mm_mask_range_pd(W, U, A, B, C) \
|
|
1968 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
|
|
1969 (__v2df)(__m128d)(B), (int)(C), \
|
|
1970 (__v2df)(__m128d)(W), (__mmask8)(U)))
|
|
1971
|
|
1972 #define _mm_maskz_range_pd(U, A, B, C) \
|
|
1973 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
|
|
1974 (__v2df)(__m128d)(B), (int)(C), \
|
|
1975 (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
|
|
1976
|
|
1977 #define _mm256_mask_fpclass_pd_mask(u, X, C) \
|
|
1978 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
|
|
1979 (int) (C),(__mmask8)(u)))
|
|
1980
|
|
1981 #define _mm256_mask_fpclass_ps_mask(u, X, C) \
|
|
1982 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
|
|
1983 (int) (C),(__mmask8)(u)))
|
|
1984
|
|
1985 #define _mm_mask_fpclass_pd_mask(u, X, C) \
|
|
1986 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
|
|
1987 (int) (C),(__mmask8)(u)))
|
|
1988
|
|
1989 #define _mm_mask_fpclass_ps_mask(u, X, C) \
|
|
1990 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
|
|
1991 (int) (C),(__mmask8)(u)))
|
|
1992
|
|
1993 #define _mm256_fpclass_pd_mask(X, C) \
|
|
1994 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
|
|
1995 (int) (C),(__mmask8)-1))
|
|
1996
|
|
1997 #define _mm256_fpclass_ps_mask(X, C) \
|
|
1998 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
|
|
1999 (int) (C),(__mmask8)-1))
|
|
2000
|
|
2001 #define _mm_fpclass_pd_mask(X, C) \
|
|
2002 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
|
|
2003 (int) (C),(__mmask8)-1))
|
|
2004
|
|
2005 #define _mm_fpclass_ps_mask(X, C) \
|
|
2006 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
|
|
2007 (int) (C),(__mmask8)-1))
|
|
2008
|
|
2009 #endif
|
|
2010
|
|
2011 #ifdef __DISABLE_AVX512VLDQ__
|
|
2012 #undef __DISABLE_AVX512VLDQ__
|
|
2013 #pragma GCC pop_options
|
|
2014 #endif /* __DISABLE_AVX512VLDQ__ */
|
|
2015
|
|
2016 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */
|