Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/i386/emmintrin.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
rev | line source |
---|---|
131 | 1 /* Copyright (C) 2003-2018 Free Software Foundation, Inc. |
0 | 2 |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify | |
6 it under the terms of the GNU General Public License as published by | |
7 the Free Software Foundation; either version 3, or (at your option) | |
8 any later version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 /* Implemented from the specification included in the Intel C++ Compiler | |
25 User Guide and Reference, version 9.0. */ | |
26 | |
27 #ifndef _EMMINTRIN_H_INCLUDED | |
28 #define _EMMINTRIN_H_INCLUDED | |
29 | |
30 /* We need definitions from the SSE header files*/ | |
31 #include <xmmintrin.h> | |
32 | |
111 | 33 #ifndef __SSE2__ |
34 #pragma GCC push_options | |
35 #pragma GCC target("sse2") | |
36 #define __DISABLE_SSE2__ | |
37 #endif /* __SSE2__ */ | |
38 | |
0 | 39 /* SSE2 */ |
40 typedef double __v2df __attribute__ ((__vector_size__ (16))); | |
41 typedef long long __v2di __attribute__ ((__vector_size__ (16))); | |
111 | 42 typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); |
0 | 43 typedef int __v4si __attribute__ ((__vector_size__ (16))); |
111 | 44 typedef unsigned int __v4su __attribute__ ((__vector_size__ (16))); |
0 | 45 typedef short __v8hi __attribute__ ((__vector_size__ (16))); |
111 | 46 typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16))); |
0 | 47 typedef char __v16qi __attribute__ ((__vector_size__ (16))); |
111 | 48 typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16))); |
0 | 49 |
50 /* The Intel API is flexible enough that we must allow aliasing with other | |
51 vector types, and their scalar components. */ | |
52 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); | |
53 typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); | |
54 | |
111 | 55 /* Unaligned version of the same types. */ |
56 typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); | |
57 typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); | |
58 | |
0 | 59 /* Create a selector for use with the SHUFPD instruction. */ |
60 #define _MM_SHUFFLE2(fp1,fp0) \ | |
61 (((fp1) << 1) | (fp0)) | |
62 | |
63 /* Create a vector with element 0 as F and the rest zero. */ | |
64 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
65 _mm_set_sd (double __F) | |
66 { | |
67 return __extension__ (__m128d){ __F, 0.0 }; | |
68 } | |
69 | |
70 /* Create a vector with both elements equal to F. */ | |
71 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
72 _mm_set1_pd (double __F) | |
73 { | |
74 return __extension__ (__m128d){ __F, __F }; | |
75 } | |
76 | |
77 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
78 _mm_set_pd1 (double __F) | |
79 { | |
80 return _mm_set1_pd (__F); | |
81 } | |
82 | |
83 /* Create a vector with the lower value X and upper value W. */ | |
84 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
85 _mm_set_pd (double __W, double __X) | |
86 { | |
87 return __extension__ (__m128d){ __X, __W }; | |
88 } | |
89 | |
90 /* Create a vector with the lower value W and upper value X. */ | |
91 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
92 _mm_setr_pd (double __W, double __X) | |
93 { | |
94 return __extension__ (__m128d){ __W, __X }; | |
95 } | |
96 | |
111 | 97 /* Create an undefined vector. */ |
98 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
99 _mm_undefined_pd (void) | |
100 { | |
101 __m128d __Y = __Y; | |
102 return __Y; | |
103 } | |
104 | |
0 | 105 /* Create a vector of zeros. */ |
106 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
107 _mm_setzero_pd (void) | |
108 { | |
109 return __extension__ (__m128d){ 0.0, 0.0 }; | |
110 } | |
111 | |
112 /* Sets the low DPFP value of A from the low value of B. */ | |
113 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
114 _mm_move_sd (__m128d __A, __m128d __B) | |
115 { | |
131 | 116 return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1}); |
0 | 117 } |
118 | |
119 /* Load two DPFP values from P. The address must be 16-byte aligned. */ | |
120 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
121 _mm_load_pd (double const *__P) | |
122 { | |
123 return *(__m128d *)__P; | |
124 } | |
125 | |
126 /* Load two DPFP values from P. The address need not be 16-byte aligned. */ | |
127 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
128 _mm_loadu_pd (double const *__P) | |
129 { | |
111 | 130 return *(__m128d_u *)__P; |
0 | 131 } |
132 | |
133 /* Create a vector with all two elements equal to *P. */ | |
134 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
135 _mm_load1_pd (double const *__P) | |
136 { | |
137 return _mm_set1_pd (*__P); | |
138 } | |
139 | |
140 /* Create a vector with element 0 as *P and the rest zero. */ | |
141 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
142 _mm_load_sd (double const *__P) | |
143 { | |
144 return _mm_set_sd (*__P); | |
145 } | |
146 | |
147 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
148 _mm_load_pd1 (double const *__P) | |
149 { | |
150 return _mm_load1_pd (__P); | |
151 } | |
152 | |
153 /* Load two DPFP values in reverse order. The address must be aligned. */ | |
154 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
155 _mm_loadr_pd (double const *__P) | |
156 { | |
157 __m128d __tmp = _mm_load_pd (__P); | |
158 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); | |
159 } | |
160 | |
161 /* Store two DPFP values. The address must be 16-byte aligned. */ | |
162 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
163 _mm_store_pd (double *__P, __m128d __A) | |
164 { | |
165 *(__m128d *)__P = __A; | |
166 } | |
167 | |
168 /* Store two DPFP values. The address need not be 16-byte aligned. */ | |
169 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
170 _mm_storeu_pd (double *__P, __m128d __A) | |
171 { | |
111 | 172 *(__m128d_u *)__P = __A; |
0 | 173 } |
174 | |
175 /* Stores the lower DPFP value. */ | |
176 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
177 _mm_store_sd (double *__P, __m128d __A) | |
178 { | |
111 | 179 *__P = ((__v2df)__A)[0]; |
0 | 180 } |
181 | |
182 extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
183 _mm_cvtsd_f64 (__m128d __A) | |
184 { | |
111 | 185 return ((__v2df)__A)[0]; |
0 | 186 } |
187 | |
188 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
189 _mm_storel_pd (double *__P, __m128d __A) | |
190 { | |
191 _mm_store_sd (__P, __A); | |
192 } | |
193 | |
194 /* Stores the upper DPFP value. */ | |
195 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
196 _mm_storeh_pd (double *__P, __m128d __A) | |
197 { | |
111 | 198 *__P = ((__v2df)__A)[1]; |
0 | 199 } |
200 | |
201 /* Store the lower DPFP value across two words. | |
202 The address must be 16-byte aligned. */ | |
203 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
204 _mm_store1_pd (double *__P, __m128d __A) | |
205 { | |
206 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); | |
207 } | |
208 | |
209 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
210 _mm_store_pd1 (double *__P, __m128d __A) | |
211 { | |
212 _mm_store1_pd (__P, __A); | |
213 } | |
214 | |
215 /* Store two DPFP values in reverse order. The address must be aligned. */ | |
216 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
217 _mm_storer_pd (double *__P, __m128d __A) | |
218 { | |
219 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); | |
220 } | |
221 | |
222 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
223 _mm_cvtsi128_si32 (__m128i __A) | |
224 { | |
225 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); | |
226 } | |
227 | |
228 #ifdef __x86_64__ | |
229 /* Intel intrinsic. */ | |
230 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
231 _mm_cvtsi128_si64 (__m128i __A) | |
232 { | |
111 | 233 return ((__v2di)__A)[0]; |
0 | 234 } |
235 | |
236 /* Microsoft intrinsic. */ | |
237 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
238 _mm_cvtsi128_si64x (__m128i __A) | |
239 { | |
111 | 240 return ((__v2di)__A)[0]; |
0 | 241 } |
242 #endif | |
243 | |
244 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
245 _mm_add_pd (__m128d __A, __m128d __B) | |
246 { | |
111 | 247 return (__m128d) ((__v2df)__A + (__v2df)__B); |
0 | 248 } |
249 | |
250 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
251 _mm_add_sd (__m128d __A, __m128d __B) | |
252 { | |
253 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); | |
254 } | |
255 | |
256 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
257 _mm_sub_pd (__m128d __A, __m128d __B) | |
258 { | |
111 | 259 return (__m128d) ((__v2df)__A - (__v2df)__B); |
0 | 260 } |
261 | |
262 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
263 _mm_sub_sd (__m128d __A, __m128d __B) | |
264 { | |
265 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); | |
266 } | |
267 | |
268 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
269 _mm_mul_pd (__m128d __A, __m128d __B) | |
270 { | |
111 | 271 return (__m128d) ((__v2df)__A * (__v2df)__B); |
0 | 272 } |
273 | |
274 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
275 _mm_mul_sd (__m128d __A, __m128d __B) | |
276 { | |
277 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); | |
278 } | |
279 | |
280 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
281 _mm_div_pd (__m128d __A, __m128d __B) | |
282 { | |
111 | 283 return (__m128d) ((__v2df)__A / (__v2df)__B); |
0 | 284 } |
285 | |
286 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
287 _mm_div_sd (__m128d __A, __m128d __B) | |
288 { | |
289 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); | |
290 } | |
291 | |
292 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
293 _mm_sqrt_pd (__m128d __A) | |
294 { | |
295 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); | |
296 } | |
297 | |
111 | 298 /* Return pair {sqrt (B[0]), A[1]}. */ |
0 | 299 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
300 _mm_sqrt_sd (__m128d __A, __m128d __B) | |
301 { | |
302 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); | |
303 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); | |
304 } | |
305 | |
306 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
307 _mm_min_pd (__m128d __A, __m128d __B) | |
308 { | |
309 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); | |
310 } | |
311 | |
312 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
313 _mm_min_sd (__m128d __A, __m128d __B) | |
314 { | |
315 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); | |
316 } | |
317 | |
318 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
319 _mm_max_pd (__m128d __A, __m128d __B) | |
320 { | |
321 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); | |
322 } | |
323 | |
324 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
325 _mm_max_sd (__m128d __A, __m128d __B) | |
326 { | |
327 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); | |
328 } | |
329 | |
330 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
331 _mm_and_pd (__m128d __A, __m128d __B) | |
332 { | |
333 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); | |
334 } | |
335 | |
336 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
337 _mm_andnot_pd (__m128d __A, __m128d __B) | |
338 { | |
339 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); | |
340 } | |
341 | |
342 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
343 _mm_or_pd (__m128d __A, __m128d __B) | |
344 { | |
345 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); | |
346 } | |
347 | |
348 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
349 _mm_xor_pd (__m128d __A, __m128d __B) | |
350 { | |
351 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); | |
352 } | |
353 | |
354 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
355 _mm_cmpeq_pd (__m128d __A, __m128d __B) | |
356 { | |
357 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); | |
358 } | |
359 | |
360 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
361 _mm_cmplt_pd (__m128d __A, __m128d __B) | |
362 { | |
363 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); | |
364 } | |
365 | |
366 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
367 _mm_cmple_pd (__m128d __A, __m128d __B) | |
368 { | |
369 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); | |
370 } | |
371 | |
372 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
373 _mm_cmpgt_pd (__m128d __A, __m128d __B) | |
374 { | |
375 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); | |
376 } | |
377 | |
378 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
379 _mm_cmpge_pd (__m128d __A, __m128d __B) | |
380 { | |
381 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); | |
382 } | |
383 | |
384 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
385 _mm_cmpneq_pd (__m128d __A, __m128d __B) | |
386 { | |
387 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); | |
388 } | |
389 | |
390 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
391 _mm_cmpnlt_pd (__m128d __A, __m128d __B) | |
392 { | |
393 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); | |
394 } | |
395 | |
396 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
397 _mm_cmpnle_pd (__m128d __A, __m128d __B) | |
398 { | |
399 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); | |
400 } | |
401 | |
402 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
403 _mm_cmpngt_pd (__m128d __A, __m128d __B) | |
404 { | |
405 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); | |
406 } | |
407 | |
408 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
409 _mm_cmpnge_pd (__m128d __A, __m128d __B) | |
410 { | |
411 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); | |
412 } | |
413 | |
414 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
415 _mm_cmpord_pd (__m128d __A, __m128d __B) | |
416 { | |
417 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); | |
418 } | |
419 | |
420 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
421 _mm_cmpunord_pd (__m128d __A, __m128d __B) | |
422 { | |
423 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); | |
424 } | |
425 | |
426 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
427 _mm_cmpeq_sd (__m128d __A, __m128d __B) | |
428 { | |
429 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); | |
430 } | |
431 | |
432 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
433 _mm_cmplt_sd (__m128d __A, __m128d __B) | |
434 { | |
435 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); | |
436 } | |
437 | |
438 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
439 _mm_cmple_sd (__m128d __A, __m128d __B) | |
440 { | |
441 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); | |
442 } | |
443 | |
444 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
445 _mm_cmpgt_sd (__m128d __A, __m128d __B) | |
446 { | |
447 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
448 (__v2df) | |
449 __builtin_ia32_cmpltsd ((__v2df) __B, | |
450 (__v2df) | |
451 __A)); | |
452 } | |
453 | |
454 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
455 _mm_cmpge_sd (__m128d __A, __m128d __B) | |
456 { | |
457 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
458 (__v2df) | |
459 __builtin_ia32_cmplesd ((__v2df) __B, | |
460 (__v2df) | |
461 __A)); | |
462 } | |
463 | |
464 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
465 _mm_cmpneq_sd (__m128d __A, __m128d __B) | |
466 { | |
467 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); | |
468 } | |
469 | |
470 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
471 _mm_cmpnlt_sd (__m128d __A, __m128d __B) | |
472 { | |
473 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); | |
474 } | |
475 | |
476 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
477 _mm_cmpnle_sd (__m128d __A, __m128d __B) | |
478 { | |
479 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); | |
480 } | |
481 | |
482 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
483 _mm_cmpngt_sd (__m128d __A, __m128d __B) | |
484 { | |
485 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
486 (__v2df) | |
487 __builtin_ia32_cmpnltsd ((__v2df) __B, | |
488 (__v2df) | |
489 __A)); | |
490 } | |
491 | |
492 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
493 _mm_cmpnge_sd (__m128d __A, __m128d __B) | |
494 { | |
495 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
496 (__v2df) | |
497 __builtin_ia32_cmpnlesd ((__v2df) __B, | |
498 (__v2df) | |
499 __A)); | |
500 } | |
501 | |
502 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
503 _mm_cmpord_sd (__m128d __A, __m128d __B) | |
504 { | |
505 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); | |
506 } | |
507 | |
508 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
509 _mm_cmpunord_sd (__m128d __A, __m128d __B) | |
510 { | |
511 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); | |
512 } | |
513 | |
514 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
515 _mm_comieq_sd (__m128d __A, __m128d __B) | |
516 { | |
517 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); | |
518 } | |
519 | |
520 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
521 _mm_comilt_sd (__m128d __A, __m128d __B) | |
522 { | |
523 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); | |
524 } | |
525 | |
526 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
527 _mm_comile_sd (__m128d __A, __m128d __B) | |
528 { | |
529 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); | |
530 } | |
531 | |
532 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
533 _mm_comigt_sd (__m128d __A, __m128d __B) | |
534 { | |
535 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); | |
536 } | |
537 | |
538 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
539 _mm_comige_sd (__m128d __A, __m128d __B) | |
540 { | |
541 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); | |
542 } | |
543 | |
544 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
545 _mm_comineq_sd (__m128d __A, __m128d __B) | |
546 { | |
547 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); | |
548 } | |
549 | |
550 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
551 _mm_ucomieq_sd (__m128d __A, __m128d __B) | |
552 { | |
553 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); | |
554 } | |
555 | |
556 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
557 _mm_ucomilt_sd (__m128d __A, __m128d __B) | |
558 { | |
559 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); | |
560 } | |
561 | |
562 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
563 _mm_ucomile_sd (__m128d __A, __m128d __B) | |
564 { | |
565 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); | |
566 } | |
567 | |
568 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
569 _mm_ucomigt_sd (__m128d __A, __m128d __B) | |
570 { | |
571 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); | |
572 } | |
573 | |
574 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
575 _mm_ucomige_sd (__m128d __A, __m128d __B) | |
576 { | |
577 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); | |
578 } | |
579 | |
580 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
581 _mm_ucomineq_sd (__m128d __A, __m128d __B) | |
582 { | |
583 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); | |
584 } | |
585 | |
586 /* Create a vector of Qi, where i is the element number. */ | |
587 | |
588 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
589 _mm_set_epi64x (long long __q1, long long __q0) | |
590 { | |
591 return __extension__ (__m128i)(__v2di){ __q0, __q1 }; | |
592 } | |
593 | |
594 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
595 _mm_set_epi64 (__m64 __q1, __m64 __q0) | |
596 { | |
597 return _mm_set_epi64x ((long long)__q1, (long long)__q0); | |
598 } | |
599 | |
600 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
601 _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) | |
602 { | |
603 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; | |
604 } | |
605 | |
606 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
607 _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, | |
608 short __q3, short __q2, short __q1, short __q0) | |
609 { | |
610 return __extension__ (__m128i)(__v8hi){ | |
611 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; | |
612 } | |
613 | |
614 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
615 _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, | |
616 char __q11, char __q10, char __q09, char __q08, | |
617 char __q07, char __q06, char __q05, char __q04, | |
618 char __q03, char __q02, char __q01, char __q00) | |
619 { | |
620 return __extension__ (__m128i)(__v16qi){ | |
621 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, | |
622 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 | |
623 }; | |
624 } | |
625 | |
626 /* Set all of the elements of the vector to A. */ | |
627 | |
628 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
629 _mm_set1_epi64x (long long __A) | |
630 { | |
631 return _mm_set_epi64x (__A, __A); | |
632 } | |
633 | |
634 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
635 _mm_set1_epi64 (__m64 __A) | |
636 { | |
637 return _mm_set_epi64 (__A, __A); | |
638 } | |
639 | |
640 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
641 _mm_set1_epi32 (int __A) | |
642 { | |
643 return _mm_set_epi32 (__A, __A, __A, __A); | |
644 } | |
645 | |
646 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
647 _mm_set1_epi16 (short __A) | |
648 { | |
649 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); | |
650 } | |
651 | |
652 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
653 _mm_set1_epi8 (char __A) | |
654 { | |
655 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, | |
656 __A, __A, __A, __A, __A, __A, __A, __A); | |
657 } | |
658 | |
659 /* Create a vector of Qi, where i is the element number. | |
660 The parameter order is reversed from the _mm_set_epi* functions. */ | |
661 | |
662 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
663 _mm_setr_epi64 (__m64 __q0, __m64 __q1) | |
664 { | |
665 return _mm_set_epi64 (__q1, __q0); | |
666 } | |
667 | |
668 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
669 _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) | |
670 { | |
671 return _mm_set_epi32 (__q3, __q2, __q1, __q0); | |
672 } | |
673 | |
674 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
675 _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, | |
676 short __q4, short __q5, short __q6, short __q7) | |
677 { | |
678 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); | |
679 } | |
680 | |
681 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
682 _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, | |
683 char __q04, char __q05, char __q06, char __q07, | |
684 char __q08, char __q09, char __q10, char __q11, | |
685 char __q12, char __q13, char __q14, char __q15) | |
686 { | |
687 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, | |
688 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); | |
689 } | |
690 | |
691 /* Create a vector with element 0 as *P and the rest zero. */ | |
692 | |
693 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
694 _mm_load_si128 (__m128i const *__P) | |
695 { | |
696 return *__P; | |
697 } | |
698 | |
699 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
111 | 700 _mm_loadu_si128 (__m128i_u const *__P) |
0 | 701 { |
111 | 702 return *__P; |
0 | 703 } |
704 | |
705 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
111 | 706 _mm_loadl_epi64 (__m128i_u const *__P) |
0 | 707 { |
111 | 708 return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P); |
0 | 709 } |
710 | |
711 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
712 _mm_store_si128 (__m128i *__P, __m128i __B) | |
713 { | |
714 *__P = __B; | |
715 } | |
716 | |
717 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
111 | 718 _mm_storeu_si128 (__m128i_u *__P, __m128i __B) |
0 | 719 { |
111 | 720 *__P = __B; |
0 | 721 } |
722 | |
723 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
111 | 724 _mm_storel_epi64 (__m128i_u *__P, __m128i __B) |
0 | 725 { |
111 | 726 *(__m64_u *)__P = (__m64) ((__v2di)__B)[0]; |
0 | 727 } |
728 | |
729 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
730 _mm_movepi64_pi64 (__m128i __B) | |
731 { | |
111 | 732 return (__m64) ((__v2di)__B)[0]; |
0 | 733 } |
734 | |
735 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
736 _mm_movpi64_epi64 (__m64 __A) | |
737 { | |
738 return _mm_set_epi64 ((__m64)0LL, __A); | |
739 } | |
740 | |
741 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
742 _mm_move_epi64 (__m128i __A) | |
743 { | |
744 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); | |
745 } | |
746 | |
111 | 747 /* Create an undefined vector. */ |
748 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
749 _mm_undefined_si128 (void) | |
750 { | |
751 __m128i __Y = __Y; | |
752 return __Y; | |
753 } | |
754 | |
0 | 755 /* Create a vector of zeros. */ |
756 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
757 _mm_setzero_si128 (void) | |
758 { | |
759 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; | |
760 } | |
761 | |
762 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
763 _mm_cvtepi32_pd (__m128i __A) | |
764 { | |
765 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); | |
766 } | |
767 | |
768 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
769 _mm_cvtepi32_ps (__m128i __A) | |
770 { | |
771 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); | |
772 } | |
773 | |
774 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
775 _mm_cvtpd_epi32 (__m128d __A) | |
776 { | |
777 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); | |
778 } | |
779 | |
780 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
781 _mm_cvtpd_pi32 (__m128d __A) | |
782 { | |
783 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); | |
784 } | |
785 | |
786 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
787 _mm_cvtpd_ps (__m128d __A) | |
788 { | |
789 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); | |
790 } | |
791 | |
792 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
793 _mm_cvttpd_epi32 (__m128d __A) | |
794 { | |
795 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); | |
796 } | |
797 | |
798 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
799 _mm_cvttpd_pi32 (__m128d __A) | |
800 { | |
801 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); | |
802 } | |
803 | |
804 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
805 _mm_cvtpi32_pd (__m64 __A) | |
806 { | |
807 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); | |
808 } | |
809 | |
810 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
811 _mm_cvtps_epi32 (__m128 __A) | |
812 { | |
813 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); | |
814 } | |
815 | |
816 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
817 _mm_cvttps_epi32 (__m128 __A) | |
818 { | |
819 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); | |
820 } | |
821 | |
822 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
823 _mm_cvtps_pd (__m128 __A) | |
824 { | |
825 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); | |
826 } | |
827 | |
828 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
829 _mm_cvtsd_si32 (__m128d __A) | |
830 { | |
831 return __builtin_ia32_cvtsd2si ((__v2df) __A); | |
832 } | |
833 | |
834 #ifdef __x86_64__ | |
835 /* Intel intrinsic. */ | |
836 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
837 _mm_cvtsd_si64 (__m128d __A) | |
838 { | |
839 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); | |
840 } | |
841 | |
842 /* Microsoft intrinsic. */ | |
843 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
844 _mm_cvtsd_si64x (__m128d __A) | |
845 { | |
846 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); | |
847 } | |
848 #endif | |
849 | |
850 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
851 _mm_cvttsd_si32 (__m128d __A) | |
852 { | |
853 return __builtin_ia32_cvttsd2si ((__v2df) __A); | |
854 } | |
855 | |
856 #ifdef __x86_64__ | |
857 /* Intel intrinsic. */ | |
858 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
859 _mm_cvttsd_si64 (__m128d __A) | |
860 { | |
861 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); | |
862 } | |
863 | |
864 /* Microsoft intrinsic. */ | |
865 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
866 _mm_cvttsd_si64x (__m128d __A) | |
867 { | |
868 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); | |
869 } | |
870 #endif | |
871 | |
872 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
873 _mm_cvtsd_ss (__m128 __A, __m128d __B) | |
874 { | |
875 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); | |
876 } | |
877 | |
878 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
879 _mm_cvtsi32_sd (__m128d __A, int __B) | |
880 { | |
881 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); | |
882 } | |
883 | |
884 #ifdef __x86_64__ | |
885 /* Intel intrinsic. */ | |
886 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
887 _mm_cvtsi64_sd (__m128d __A, long long __B) | |
888 { | |
889 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); | |
890 } | |
891 | |
892 /* Microsoft intrinsic. */ | |
893 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
894 _mm_cvtsi64x_sd (__m128d __A, long long __B) | |
895 { | |
896 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); | |
897 } | |
898 #endif | |
899 | |
900 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
901 _mm_cvtss_sd (__m128d __A, __m128 __B) | |
902 { | |
903 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); | |
904 } | |
905 | |
906 #ifdef __OPTIMIZE__ | |
907 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
908 _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) | |
909 { | |
910 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); | |
911 } | |
912 #else | |
913 #define _mm_shuffle_pd(A, B, N) \ | |
914 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ | |
915 (__v2df)(__m128d)(B), (int)(N))) | |
916 #endif | |
917 | |
918 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
919 _mm_unpackhi_pd (__m128d __A, __m128d __B) | |
920 { | |
921 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); | |
922 } | |
923 | |
924 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
925 _mm_unpacklo_pd (__m128d __A, __m128d __B) | |
926 { | |
927 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); | |
928 } | |
929 | |
930 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
931 _mm_loadh_pd (__m128d __A, double const *__B) | |
932 { | |
933 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); | |
934 } | |
935 | |
936 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
937 _mm_loadl_pd (__m128d __A, double const *__B) | |
938 { | |
939 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); | |
940 } | |
941 | |
942 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
943 _mm_movemask_pd (__m128d __A) | |
944 { | |
945 return __builtin_ia32_movmskpd ((__v2df)__A); | |
946 } | |
947 | |
948 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
949 _mm_packs_epi16 (__m128i __A, __m128i __B) | |
950 { | |
951 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); | |
952 } | |
953 | |
954 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
955 _mm_packs_epi32 (__m128i __A, __m128i __B) | |
956 { | |
957 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); | |
958 } | |
959 | |
960 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
961 _mm_packus_epi16 (__m128i __A, __m128i __B) | |
962 { | |
963 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); | |
964 } | |
965 | |
966 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
967 _mm_unpackhi_epi8 (__m128i __A, __m128i __B) | |
968 { | |
969 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); | |
970 } | |
971 | |
972 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
973 _mm_unpackhi_epi16 (__m128i __A, __m128i __B) | |
974 { | |
975 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); | |
976 } | |
977 | |
978 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
979 _mm_unpackhi_epi32 (__m128i __A, __m128i __B) | |
980 { | |
981 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); | |
982 } | |
983 | |
984 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
985 _mm_unpackhi_epi64 (__m128i __A, __m128i __B) | |
986 { | |
987 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); | |
988 } | |
989 | |
990 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
991 _mm_unpacklo_epi8 (__m128i __A, __m128i __B) | |
992 { | |
993 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); | |
994 } | |
995 | |
996 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
997 _mm_unpacklo_epi16 (__m128i __A, __m128i __B) | |
998 { | |
999 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); | |
1000 } | |
1001 | |
1002 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1003 _mm_unpacklo_epi32 (__m128i __A, __m128i __B) | |
1004 { | |
1005 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); | |
1006 } | |
1007 | |
1008 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1009 _mm_unpacklo_epi64 (__m128i __A, __m128i __B) | |
1010 { | |
1011 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); | |
1012 } | |
1013 | |
1014 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1015 _mm_add_epi8 (__m128i __A, __m128i __B) | |
1016 { | |
111 | 1017 return (__m128i) ((__v16qu)__A + (__v16qu)__B); |
0 | 1018 } |
1019 | |
1020 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1021 _mm_add_epi16 (__m128i __A, __m128i __B) | |
1022 { | |
111 | 1023 return (__m128i) ((__v8hu)__A + (__v8hu)__B); |
0 | 1024 } |
1025 | |
1026 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1027 _mm_add_epi32 (__m128i __A, __m128i __B) | |
1028 { | |
111 | 1029 return (__m128i) ((__v4su)__A + (__v4su)__B); |
0 | 1030 } |
1031 | |
1032 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1033 _mm_add_epi64 (__m128i __A, __m128i __B) | |
1034 { | |
111 | 1035 return (__m128i) ((__v2du)__A + (__v2du)__B); |
0 | 1036 } |
1037 | |
1038 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1039 _mm_adds_epi8 (__m128i __A, __m128i __B) | |
1040 { | |
1041 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); | |
1042 } | |
1043 | |
1044 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1045 _mm_adds_epi16 (__m128i __A, __m128i __B) | |
1046 { | |
1047 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); | |
1048 } | |
1049 | |
1050 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1051 _mm_adds_epu8 (__m128i __A, __m128i __B) | |
1052 { | |
1053 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); | |
1054 } | |
1055 | |
1056 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1057 _mm_adds_epu16 (__m128i __A, __m128i __B) | |
1058 { | |
1059 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); | |
1060 } | |
1061 | |
1062 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1063 _mm_sub_epi8 (__m128i __A, __m128i __B) | |
1064 { | |
111 | 1065 return (__m128i) ((__v16qu)__A - (__v16qu)__B); |
0 | 1066 } |
1067 | |
1068 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1069 _mm_sub_epi16 (__m128i __A, __m128i __B) | |
1070 { | |
111 | 1071 return (__m128i) ((__v8hu)__A - (__v8hu)__B); |
0 | 1072 } |
1073 | |
1074 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1075 _mm_sub_epi32 (__m128i __A, __m128i __B) | |
1076 { | |
111 | 1077 return (__m128i) ((__v4su)__A - (__v4su)__B); |
0 | 1078 } |
1079 | |
1080 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1081 _mm_sub_epi64 (__m128i __A, __m128i __B) | |
1082 { | |
111 | 1083 return (__m128i) ((__v2du)__A - (__v2du)__B); |
0 | 1084 } |
1085 | |
1086 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1087 _mm_subs_epi8 (__m128i __A, __m128i __B) | |
1088 { | |
1089 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); | |
1090 } | |
1091 | |
1092 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1093 _mm_subs_epi16 (__m128i __A, __m128i __B) | |
1094 { | |
1095 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); | |
1096 } | |
1097 | |
1098 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1099 _mm_subs_epu8 (__m128i __A, __m128i __B) | |
1100 { | |
1101 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); | |
1102 } | |
1103 | |
1104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1105 _mm_subs_epu16 (__m128i __A, __m128i __B) | |
1106 { | |
1107 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); | |
1108 } | |
1109 | |
1110 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1111 _mm_madd_epi16 (__m128i __A, __m128i __B) | |
1112 { | |
1113 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); | |
1114 } | |
1115 | |
1116 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1117 _mm_mulhi_epi16 (__m128i __A, __m128i __B) | |
1118 { | |
1119 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); | |
1120 } | |
1121 | |
1122 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1123 _mm_mullo_epi16 (__m128i __A, __m128i __B) | |
1124 { | |
111 | 1125 return (__m128i) ((__v8hu)__A * (__v8hu)__B); |
0 | 1126 } |
1127 | |
1128 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1129 _mm_mul_su32 (__m64 __A, __m64 __B) | |
1130 { | |
1131 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); | |
1132 } | |
1133 | |
1134 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1135 _mm_mul_epu32 (__m128i __A, __m128i __B) | |
1136 { | |
1137 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); | |
1138 } | |
1139 | |
1140 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1141 _mm_slli_epi16 (__m128i __A, int __B) | |
1142 { | |
1143 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); | |
1144 } | |
1145 | |
1146 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1147 _mm_slli_epi32 (__m128i __A, int __B) | |
1148 { | |
1149 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); | |
1150 } | |
1151 | |
1152 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1153 _mm_slli_epi64 (__m128i __A, int __B) | |
1154 { | |
1155 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); | |
1156 } | |
1157 | |
1158 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1159 _mm_srai_epi16 (__m128i __A, int __B) | |
1160 { | |
1161 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); | |
1162 } | |
1163 | |
1164 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1165 _mm_srai_epi32 (__m128i __A, int __B) | |
1166 { | |
1167 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); | |
1168 } | |
1169 | |
1170 #ifdef __OPTIMIZE__ | |
1171 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
111 | 1172 _mm_bsrli_si128 (__m128i __A, const int __N) |
1173 { | |
1174 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); | |
1175 } | |
1176 | |
1177 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1178 _mm_bslli_si128 (__m128i __A, const int __N) | |
1179 { | |
1180 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); | |
1181 } | |
1182 | |
1183 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
0 | 1184 _mm_srli_si128 (__m128i __A, const int __N) |
1185 { | |
1186 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); | |
1187 } | |
1188 | |
1189 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1190 _mm_slli_si128 (__m128i __A, const int __N) | |
1191 { | |
1192 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); | |
1193 } | |
1194 #else | |
111 | 1195 #define _mm_bsrli_si128(A, N) \ |
1196 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) | |
1197 #define _mm_bslli_si128(A, N) \ | |
1198 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) | |
0 | 1199 #define _mm_srli_si128(A, N) \ |
1200 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) | |
1201 #define _mm_slli_si128(A, N) \ | |
1202 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) | |
1203 #endif | |
1204 | |
1205 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1206 _mm_srli_epi16 (__m128i __A, int __B) | |
1207 { | |
1208 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); | |
1209 } | |
1210 | |
1211 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1212 _mm_srli_epi32 (__m128i __A, int __B) | |
1213 { | |
1214 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); | |
1215 } | |
1216 | |
1217 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1218 _mm_srli_epi64 (__m128i __A, int __B) | |
1219 { | |
1220 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); | |
1221 } | |
1222 | |
1223 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1224 _mm_sll_epi16 (__m128i __A, __m128i __B) | |
1225 { | |
1226 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); | |
1227 } | |
1228 | |
1229 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1230 _mm_sll_epi32 (__m128i __A, __m128i __B) | |
1231 { | |
1232 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); | |
1233 } | |
1234 | |
1235 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1236 _mm_sll_epi64 (__m128i __A, __m128i __B) | |
1237 { | |
1238 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); | |
1239 } | |
1240 | |
1241 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1242 _mm_sra_epi16 (__m128i __A, __m128i __B) | |
1243 { | |
1244 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); | |
1245 } | |
1246 | |
1247 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1248 _mm_sra_epi32 (__m128i __A, __m128i __B) | |
1249 { | |
1250 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); | |
1251 } | |
1252 | |
1253 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1254 _mm_srl_epi16 (__m128i __A, __m128i __B) | |
1255 { | |
1256 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); | |
1257 } | |
1258 | |
1259 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1260 _mm_srl_epi32 (__m128i __A, __m128i __B) | |
1261 { | |
1262 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); | |
1263 } | |
1264 | |
1265 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1266 _mm_srl_epi64 (__m128i __A, __m128i __B) | |
1267 { | |
1268 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); | |
1269 } | |
1270 | |
1271 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1272 _mm_and_si128 (__m128i __A, __m128i __B) | |
1273 { | |
111 | 1274 return (__m128i) ((__v2du)__A & (__v2du)__B); |
0 | 1275 } |
1276 | |
1277 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1278 _mm_andnot_si128 (__m128i __A, __m128i __B) | |
1279 { | |
1280 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); | |
1281 } | |
1282 | |
1283 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1284 _mm_or_si128 (__m128i __A, __m128i __B) | |
1285 { | |
111 | 1286 return (__m128i) ((__v2du)__A | (__v2du)__B); |
0 | 1287 } |
1288 | |
1289 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1290 _mm_xor_si128 (__m128i __A, __m128i __B) | |
1291 { | |
111 | 1292 return (__m128i) ((__v2du)__A ^ (__v2du)__B); |
0 | 1293 } |
1294 | |
1295 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1296 _mm_cmpeq_epi8 (__m128i __A, __m128i __B) | |
1297 { | |
111 | 1298 return (__m128i) ((__v16qi)__A == (__v16qi)__B); |
0 | 1299 } |
1300 | |
1301 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1302 _mm_cmpeq_epi16 (__m128i __A, __m128i __B) | |
1303 { | |
111 | 1304 return (__m128i) ((__v8hi)__A == (__v8hi)__B); |
0 | 1305 } |
1306 | |
1307 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1308 _mm_cmpeq_epi32 (__m128i __A, __m128i __B) | |
1309 { | |
111 | 1310 return (__m128i) ((__v4si)__A == (__v4si)__B); |
0 | 1311 } |
1312 | |
1313 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1314 _mm_cmplt_epi8 (__m128i __A, __m128i __B) | |
1315 { | |
111 | 1316 return (__m128i) ((__v16qi)__A < (__v16qi)__B); |
0 | 1317 } |
1318 | |
1319 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1320 _mm_cmplt_epi16 (__m128i __A, __m128i __B) | |
1321 { | |
111 | 1322 return (__m128i) ((__v8hi)__A < (__v8hi)__B); |
0 | 1323 } |
1324 | |
1325 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1326 _mm_cmplt_epi32 (__m128i __A, __m128i __B) | |
1327 { | |
111 | 1328 return (__m128i) ((__v4si)__A < (__v4si)__B); |
0 | 1329 } |
1330 | |
1331 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1332 _mm_cmpgt_epi8 (__m128i __A, __m128i __B) | |
1333 { | |
111 | 1334 return (__m128i) ((__v16qi)__A > (__v16qi)__B); |
0 | 1335 } |
1336 | |
1337 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1338 _mm_cmpgt_epi16 (__m128i __A, __m128i __B) | |
1339 { | |
111 | 1340 return (__m128i) ((__v8hi)__A > (__v8hi)__B); |
0 | 1341 } |
1342 | |
1343 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1344 _mm_cmpgt_epi32 (__m128i __A, __m128i __B) | |
1345 { | |
111 | 1346 return (__m128i) ((__v4si)__A > (__v4si)__B); |
0 | 1347 } |
1348 | |
1349 #ifdef __OPTIMIZE__ | |
1350 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1351 _mm_extract_epi16 (__m128i const __A, int const __N) | |
1352 { | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1353 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); |
0 | 1354 } |
1355 | |
1356 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1357 _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) | |
1358 { | |
1359 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); | |
1360 } | |
1361 #else | |
1362 #define _mm_extract_epi16(A, N) \ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1363 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) |
0 | 1364 #define _mm_insert_epi16(A, D, N) \ |
1365 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ | |
1366 (int)(D), (int)(N))) | |
1367 #endif | |
1368 | |
1369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1370 _mm_max_epi16 (__m128i __A, __m128i __B) | |
1371 { | |
1372 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); | |
1373 } | |
1374 | |
1375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1376 _mm_max_epu8 (__m128i __A, __m128i __B) | |
1377 { | |
1378 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); | |
1379 } | |
1380 | |
1381 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1382 _mm_min_epi16 (__m128i __A, __m128i __B) | |
1383 { | |
1384 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); | |
1385 } | |
1386 | |
1387 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1388 _mm_min_epu8 (__m128i __A, __m128i __B) | |
1389 { | |
1390 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); | |
1391 } | |
1392 | |
1393 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1394 _mm_movemask_epi8 (__m128i __A) | |
1395 { | |
1396 return __builtin_ia32_pmovmskb128 ((__v16qi)__A); | |
1397 } | |
1398 | |
1399 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1400 _mm_mulhi_epu16 (__m128i __A, __m128i __B) | |
1401 { | |
1402 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); | |
1403 } | |
1404 | |
1405 #ifdef __OPTIMIZE__ | |
1406 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1407 _mm_shufflehi_epi16 (__m128i __A, const int __mask) | |
1408 { | |
1409 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); | |
1410 } | |
1411 | |
1412 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1413 _mm_shufflelo_epi16 (__m128i __A, const int __mask) | |
1414 { | |
1415 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); | |
1416 } | |
1417 | |
1418 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1419 _mm_shuffle_epi32 (__m128i __A, const int __mask) | |
1420 { | |
1421 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); | |
1422 } | |
1423 #else | |
1424 #define _mm_shufflehi_epi16(A, N) \ | |
1425 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) | |
1426 #define _mm_shufflelo_epi16(A, N) \ | |
1427 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) | |
1428 #define _mm_shuffle_epi32(A, N) \ | |
1429 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) | |
1430 #endif | |
1431 | |
1432 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1433 _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) | |
1434 { | |
1435 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); | |
1436 } | |
1437 | |
1438 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1439 _mm_avg_epu8 (__m128i __A, __m128i __B) | |
1440 { | |
1441 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); | |
1442 } | |
1443 | |
1444 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1445 _mm_avg_epu16 (__m128i __A, __m128i __B) | |
1446 { | |
1447 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); | |
1448 } | |
1449 | |
1450 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1451 _mm_sad_epu8 (__m128i __A, __m128i __B) | |
1452 { | |
1453 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); | |
1454 } | |
1455 | |
1456 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1457 _mm_stream_si32 (int *__A, int __B) | |
1458 { | |
1459 __builtin_ia32_movnti (__A, __B); | |
1460 } | |
1461 | |
111 | 1462 #ifdef __x86_64__ |
1463 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1464 _mm_stream_si64 (long long int *__A, long long int __B) | |
1465 { | |
1466 __builtin_ia32_movnti64 (__A, __B); | |
1467 } | |
1468 #endif | |
1469 | |
0 | 1470 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1471 _mm_stream_si128 (__m128i *__A, __m128i __B) | |
1472 { | |
1473 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); | |
1474 } | |
1475 | |
1476 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1477 _mm_stream_pd (double *__A, __m128d __B) | |
1478 { | |
1479 __builtin_ia32_movntpd (__A, (__v2df)__B); | |
1480 } | |
1481 | |
1482 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1483 _mm_clflush (void const *__A) | |
1484 { | |
1485 __builtin_ia32_clflush (__A); | |
1486 } | |
1487 | |
1488 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1489 _mm_lfence (void) | |
1490 { | |
1491 __builtin_ia32_lfence (); | |
1492 } | |
1493 | |
1494 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1495 _mm_mfence (void) | |
1496 { | |
1497 __builtin_ia32_mfence (); | |
1498 } | |
1499 | |
1500 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1501 _mm_cvtsi32_si128 (int __A) | |
1502 { | |
1503 return _mm_set_epi32 (0, 0, 0, __A); | |
1504 } | |
1505 | |
1506 #ifdef __x86_64__ | |
1507 /* Intel intrinsic. */ | |
1508 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1509 _mm_cvtsi64_si128 (long long __A) | |
1510 { | |
1511 return _mm_set_epi64x (0, __A); | |
1512 } | |
1513 | |
1514 /* Microsoft intrinsic. */ | |
1515 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1516 _mm_cvtsi64x_si128 (long long __A) | |
1517 { | |
1518 return _mm_set_epi64x (0, __A); | |
1519 } | |
1520 #endif | |
1521 | |
1522 /* Casts between various SP, DP, INT vector types. Note that these do no | |
1523 conversion of values, they just change the type. */ | |
1524 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1525 _mm_castpd_ps(__m128d __A) | |
1526 { | |
1527 return (__m128) __A; | |
1528 } | |
1529 | |
1530 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1531 _mm_castpd_si128(__m128d __A) | |
1532 { | |
1533 return (__m128i) __A; | |
1534 } | |
1535 | |
1536 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1537 _mm_castps_pd(__m128 __A) | |
1538 { | |
1539 return (__m128d) __A; | |
1540 } | |
1541 | |
1542 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1543 _mm_castps_si128(__m128 __A) | |
1544 { | |
1545 return (__m128i) __A; | |
1546 } | |
1547 | |
1548 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1549 _mm_castsi128_ps(__m128i __A) | |
1550 { | |
1551 return (__m128) __A; | |
1552 } | |
1553 | |
1554 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1555 _mm_castsi128_pd(__m128i __A) | |
1556 { | |
1557 return (__m128d) __A; | |
1558 } | |
1559 | |
111 | 1560 #ifdef __DISABLE_SSE2__ |
1561 #undef __DISABLE_SSE2__ | |
1562 #pragma GCC pop_options | |
1563 #endif /* __DISABLE_SSE2__ */ | |
0 | 1564 |
1565 #endif /* _EMMINTRIN_H_INCLUDED */ |