145
|
1 /* Copyright (C) 2002-2020 Free Software Foundation, Inc.
|
0
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify
|
|
6 it under the terms of the GNU General Public License as published by
|
|
7 the Free Software Foundation; either version 3, or (at your option)
|
|
8 any later version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU General Public License for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 /* Implemented from the specification included in the Intel C++ Compiler
|
|
25 User Guide and Reference, version 9.0. */
|
|
26
|
|
27 #ifndef _MMINTRIN_H_INCLUDED
|
|
28 #define _MMINTRIN_H_INCLUDED
|
|
29
|
111
|
30 #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
|
|
31 #pragma GCC push_options
|
145
|
32 #ifdef __MMX_WITH_SSE__
|
|
33 #pragma GCC target("sse2")
|
|
34 #elif defined __x86_64__
|
111
|
35 #pragma GCC target("sse,mmx")
|
0
|
36 #else
|
111
|
37 #pragma GCC target("mmx")
|
|
38 #endif
|
|
39 #define __DISABLE_MMX__
|
|
40 #endif /* __MMX__ */
|
|
41
|
0
|
42 /* The Intel API is flexible enough that we must allow aliasing with other
|
|
43 vector types, and their scalar components. */
|
|
44 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
|
|
45
|
111
|
46 /* Unaligned version of the same type */
|
|
47 typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
|
|
48
|
0
|
49 /* Internal data types for implementing the intrinsics. */
|
|
50 typedef int __v2si __attribute__ ((__vector_size__ (8)));
|
|
51 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
|
|
52 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
|
|
53 typedef long long __v1di __attribute__ ((__vector_size__ (8)));
|
|
54 typedef float __v2sf __attribute__ ((__vector_size__ (8)));
|
|
55
|
|
56 /* Empty the multimedia state. */
|
|
57 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
58 _mm_empty (void)
|
|
59 {
|
|
60 __builtin_ia32_emms ();
|
|
61 }
|
|
62
|
|
63 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
64 _m_empty (void)
|
|
65 {
|
|
66 _mm_empty ();
|
|
67 }
|
|
68
|
|
69 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
|
|
70 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
71 _mm_cvtsi32_si64 (int __i)
|
|
72 {
|
|
73 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
|
|
74 }
|
|
75
|
|
76 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
77 _m_from_int (int __i)
|
|
78 {
|
|
79 return _mm_cvtsi32_si64 (__i);
|
|
80 }
|
|
81
|
|
82 #ifdef __x86_64__
|
|
83 /* Convert I to a __m64 object. */
|
|
84
|
|
85 /* Intel intrinsic. */
|
|
86 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
87 _m_from_int64 (long long __i)
|
|
88 {
|
|
89 return (__m64) __i;
|
|
90 }
|
|
91
|
|
92 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
93 _mm_cvtsi64_m64 (long long __i)
|
|
94 {
|
|
95 return (__m64) __i;
|
|
96 }
|
|
97
|
|
98 /* Microsoft intrinsic. */
|
|
99 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
100 _mm_cvtsi64x_si64 (long long __i)
|
|
101 {
|
|
102 return (__m64) __i;
|
|
103 }
|
|
104
|
|
105 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
106 _mm_set_pi64x (long long __i)
|
|
107 {
|
|
108 return (__m64) __i;
|
|
109 }
|
|
110 #endif
|
|
111
|
|
112 /* Convert the lower 32 bits of the __m64 object into an integer. */
|
|
113 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
114 _mm_cvtsi64_si32 (__m64 __i)
|
|
115 {
|
|
116 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
|
|
117 }
|
|
118
|
|
119 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
120 _m_to_int (__m64 __i)
|
|
121 {
|
|
122 return _mm_cvtsi64_si32 (__i);
|
|
123 }
|
|
124
|
|
125 #ifdef __x86_64__
|
|
126 /* Convert the __m64 object to a 64bit integer. */
|
|
127
|
|
128 /* Intel intrinsic. */
|
|
129 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
130 _m_to_int64 (__m64 __i)
|
|
131 {
|
|
132 return (long long)__i;
|
|
133 }
|
|
134
|
|
135 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
136 _mm_cvtm64_si64 (__m64 __i)
|
|
137 {
|
|
138 return (long long)__i;
|
|
139 }
|
|
140
|
|
141 /* Microsoft intrinsic. */
|
|
142 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
143 _mm_cvtsi64_si64x (__m64 __i)
|
|
144 {
|
|
145 return (long long)__i;
|
|
146 }
|
|
147 #endif
|
|
148
|
|
149 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
|
150 the result, and the four 16-bit values from M2 into the upper four 8-bit
|
|
151 values of the result, all with signed saturation. */
|
|
152 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
153 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
|
|
154 {
|
|
155 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
|
|
156 }
|
|
157
|
|
158 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
159 _m_packsswb (__m64 __m1, __m64 __m2)
|
|
160 {
|
|
161 return _mm_packs_pi16 (__m1, __m2);
|
|
162 }
|
|
163
|
|
164 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
|
|
165 the result, and the two 32-bit values from M2 into the upper two 16-bit
|
|
166 values of the result, all with signed saturation. */
|
|
167 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
168 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
|
|
169 {
|
|
170 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
|
|
171 }
|
|
172
|
|
173 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
174 _m_packssdw (__m64 __m1, __m64 __m2)
|
|
175 {
|
|
176 return _mm_packs_pi32 (__m1, __m2);
|
|
177 }
|
|
178
|
|
179 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
|
180 the result, and the four 16-bit values from M2 into the upper four 8-bit
|
|
181 values of the result, all with unsigned saturation. */
|
|
182 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
183 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
|
|
184 {
|
|
185 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
|
|
186 }
|
|
187
|
|
188 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
189 _m_packuswb (__m64 __m1, __m64 __m2)
|
|
190 {
|
|
191 return _mm_packs_pu16 (__m1, __m2);
|
|
192 }
|
|
193
|
|
194 /* Interleave the four 8-bit values from the high half of M1 with the four
|
|
195 8-bit values from the high half of M2. */
|
|
196 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
197 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
|
|
198 {
|
|
199 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
|
|
200 }
|
|
201
|
|
202 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
203 _m_punpckhbw (__m64 __m1, __m64 __m2)
|
|
204 {
|
|
205 return _mm_unpackhi_pi8 (__m1, __m2);
|
|
206 }
|
|
207
|
|
208 /* Interleave the two 16-bit values from the high half of M1 with the two
|
|
209 16-bit values from the high half of M2. */
|
|
210 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
211 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
|
|
212 {
|
|
213 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
214 }
|
|
215
|
|
216 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
217 _m_punpckhwd (__m64 __m1, __m64 __m2)
|
|
218 {
|
|
219 return _mm_unpackhi_pi16 (__m1, __m2);
|
|
220 }
|
|
221
|
|
222 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
|
|
223 value from the high half of M2. */
|
|
224 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
225 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
|
|
226 {
|
|
227 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
|
|
228 }
|
|
229
|
|
230 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
231 _m_punpckhdq (__m64 __m1, __m64 __m2)
|
|
232 {
|
|
233 return _mm_unpackhi_pi32 (__m1, __m2);
|
|
234 }
|
|
235
|
|
236 /* Interleave the four 8-bit values from the low half of M1 with the four
|
|
237 8-bit values from the low half of M2. */
|
|
238 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
239 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
|
|
240 {
|
|
241 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
|
|
242 }
|
|
243
|
|
244 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
245 _m_punpcklbw (__m64 __m1, __m64 __m2)
|
|
246 {
|
|
247 return _mm_unpacklo_pi8 (__m1, __m2);
|
|
248 }
|
|
249
|
|
250 /* Interleave the two 16-bit values from the low half of M1 with the two
|
|
251 16-bit values from the low half of M2. */
|
|
252 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
253 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
|
|
254 {
|
|
255 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
256 }
|
|
257
|
|
258 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
259 _m_punpcklwd (__m64 __m1, __m64 __m2)
|
|
260 {
|
|
261 return _mm_unpacklo_pi16 (__m1, __m2);
|
|
262 }
|
|
263
|
|
264 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
|
|
265 value from the low half of M2. */
|
|
266 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
267 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
|
|
268 {
|
|
269 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
|
|
270 }
|
|
271
|
|
272 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
273 _m_punpckldq (__m64 __m1, __m64 __m2)
|
|
274 {
|
|
275 return _mm_unpacklo_pi32 (__m1, __m2);
|
|
276 }
|
|
277
|
|
278 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
|
|
279 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
280 _mm_add_pi8 (__m64 __m1, __m64 __m2)
|
|
281 {
|
|
282 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
|
|
283 }
|
|
284
|
|
285 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
286 _m_paddb (__m64 __m1, __m64 __m2)
|
|
287 {
|
|
288 return _mm_add_pi8 (__m1, __m2);
|
|
289 }
|
|
290
|
|
291 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
|
|
292 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
293 _mm_add_pi16 (__m64 __m1, __m64 __m2)
|
|
294 {
|
|
295 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
|
|
296 }
|
|
297
|
|
298 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
299 _m_paddw (__m64 __m1, __m64 __m2)
|
|
300 {
|
|
301 return _mm_add_pi16 (__m1, __m2);
|
|
302 }
|
|
303
|
|
304 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
|
|
305 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
306 _mm_add_pi32 (__m64 __m1, __m64 __m2)
|
|
307 {
|
|
308 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
|
|
309 }
|
|
310
|
|
311 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
312 _m_paddd (__m64 __m1, __m64 __m2)
|
|
313 {
|
|
314 return _mm_add_pi32 (__m1, __m2);
|
|
315 }
|
|
316
|
|
317 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
111
|
318 #ifndef __SSE2__
|
|
319 #pragma GCC push_options
|
145
|
320 #ifdef __MMX_WITH_SSE__
|
|
321 #pragma GCC target("sse2")
|
|
322 #else
|
111
|
323 #pragma GCC target("sse2,mmx")
|
145
|
324 #endif
|
111
|
325 #define __DISABLE_SSE2__
|
|
326 #endif /* __SSE2__ */
|
|
327
|
0
|
328 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
329 _mm_add_si64 (__m64 __m1, __m64 __m2)
|
|
330 {
|
|
331 return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
|
|
332 }
|
111
|
333 #ifdef __DISABLE_SSE2__
|
|
334 #undef __DISABLE_SSE2__
|
|
335 #pragma GCC pop_options
|
|
336 #endif /* __DISABLE_SSE2__ */
|
0
|
337
|
|
338 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
|
|
339 saturated arithmetic. */
|
|
340 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
341 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
|
|
342 {
|
|
343 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
|
|
344 }
|
|
345
|
|
346 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
347 _m_paddsb (__m64 __m1, __m64 __m2)
|
|
348 {
|
|
349 return _mm_adds_pi8 (__m1, __m2);
|
|
350 }
|
|
351
|
|
352 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
|
|
353 saturated arithmetic. */
|
|
354 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
355 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
|
|
356 {
|
|
357 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
|
|
358 }
|
|
359
|
|
360 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
361 _m_paddsw (__m64 __m1, __m64 __m2)
|
|
362 {
|
|
363 return _mm_adds_pi16 (__m1, __m2);
|
|
364 }
|
|
365
|
|
366 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
|
|
367 saturated arithmetic. */
|
|
368 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
369 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
|
|
370 {
|
|
371 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
|
|
372 }
|
|
373
|
|
374 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
375 _m_paddusb (__m64 __m1, __m64 __m2)
|
|
376 {
|
|
377 return _mm_adds_pu8 (__m1, __m2);
|
|
378 }
|
|
379
|
|
380 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
|
|
381 saturated arithmetic. */
|
|
382 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
383 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
|
|
384 {
|
|
385 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
|
|
386 }
|
|
387
|
|
388 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
389 _m_paddusw (__m64 __m1, __m64 __m2)
|
|
390 {
|
|
391 return _mm_adds_pu16 (__m1, __m2);
|
|
392 }
|
|
393
|
|
394 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
|
|
395 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
396 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
|
|
397 {
|
|
398 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
|
|
399 }
|
|
400
|
|
401 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
402 _m_psubb (__m64 __m1, __m64 __m2)
|
|
403 {
|
|
404 return _mm_sub_pi8 (__m1, __m2);
|
|
405 }
|
|
406
|
|
407 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
|
|
408 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
409 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
|
|
410 {
|
|
411 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
|
|
412 }
|
|
413
|
|
414 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
415 _m_psubw (__m64 __m1, __m64 __m2)
|
|
416 {
|
|
417 return _mm_sub_pi16 (__m1, __m2);
|
|
418 }
|
|
419
|
|
420 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
|
|
421 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
422 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
|
|
423 {
|
|
424 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
|
|
425 }
|
|
426
|
|
427 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
428 _m_psubd (__m64 __m1, __m64 __m2)
|
|
429 {
|
|
430 return _mm_sub_pi32 (__m1, __m2);
|
|
431 }
|
|
432
|
|
433 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
111
|
434 #ifndef __SSE2__
|
|
435 #pragma GCC push_options
|
145
|
436 #ifdef __MMX_WITH_SSE__
|
|
437 #pragma GCC target("sse2")
|
|
438 #else
|
111
|
439 #pragma GCC target("sse2,mmx")
|
145
|
440 #endif
|
111
|
441 #define __DISABLE_SSE2__
|
|
442 #endif /* __SSE2__ */
|
|
443
|
0
|
444 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
445 _mm_sub_si64 (__m64 __m1, __m64 __m2)
|
|
446 {
|
|
447 return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
|
|
448 }
|
111
|
449 #ifdef __DISABLE_SSE2__
|
|
450 #undef __DISABLE_SSE2__
|
|
451 #pragma GCC pop_options
|
|
452 #endif /* __DISABLE_SSE2__ */
|
0
|
453
|
|
454 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
|
|
455 saturating arithmetic. */
|
|
456 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
457 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
|
|
458 {
|
|
459 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
|
|
460 }
|
|
461
|
|
462 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
463 _m_psubsb (__m64 __m1, __m64 __m2)
|
|
464 {
|
|
465 return _mm_subs_pi8 (__m1, __m2);
|
|
466 }
|
|
467
|
|
468 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
|
469 signed saturating arithmetic. */
|
|
470 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
471 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
|
|
472 {
|
|
473 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
|
|
474 }
|
|
475
|
|
476 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
477 _m_psubsw (__m64 __m1, __m64 __m2)
|
|
478 {
|
|
479 return _mm_subs_pi16 (__m1, __m2);
|
|
480 }
|
|
481
|
|
482 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
|
|
483 unsigned saturating arithmetic. */
|
|
484 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
485 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
|
|
486 {
|
|
487 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
|
|
488 }
|
|
489
|
|
490 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
491 _m_psubusb (__m64 __m1, __m64 __m2)
|
|
492 {
|
|
493 return _mm_subs_pu8 (__m1, __m2);
|
|
494 }
|
|
495
|
|
496 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
|
497 unsigned saturating arithmetic. */
|
|
498 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
499 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
|
|
500 {
|
|
501 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
|
|
502 }
|
|
503
|
|
504 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
505 _m_psubusw (__m64 __m1, __m64 __m2)
|
|
506 {
|
|
507 return _mm_subs_pu16 (__m1, __m2);
|
|
508 }
|
|
509
|
|
510 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
|
|
511 four 32-bit intermediate results, which are then summed by pairs to
|
|
512 produce two 32-bit results. */
|
|
513 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
514 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
|
|
515 {
|
|
516 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
517 }
|
|
518
|
|
519 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
520 _m_pmaddwd (__m64 __m1, __m64 __m2)
|
|
521 {
|
|
522 return _mm_madd_pi16 (__m1, __m2);
|
|
523 }
|
|
524
|
|
525 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
|
|
526 M2 and produce the high 16 bits of the 32-bit results. */
|
|
527 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
528 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
|
|
529 {
|
|
530 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
|
|
531 }
|
|
532
|
|
533 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
534 _m_pmulhw (__m64 __m1, __m64 __m2)
|
|
535 {
|
|
536 return _mm_mulhi_pi16 (__m1, __m2);
|
|
537 }
|
|
538
|
|
539 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
|
|
540 the low 16 bits of the results. */
|
|
541 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
542 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
|
|
543 {
|
|
544 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
|
|
545 }
|
|
546
|
|
547 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
548 _m_pmullw (__m64 __m1, __m64 __m2)
|
|
549 {
|
|
550 return _mm_mullo_pi16 (__m1, __m2);
|
|
551 }
|
|
552
|
|
553 /* Shift four 16-bit values in M left by COUNT. */
|
|
554 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
555 _mm_sll_pi16 (__m64 __m, __m64 __count)
|
|
556 {
|
|
557 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
|
|
558 }
|
|
559
|
|
560 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
561 _m_psllw (__m64 __m, __m64 __count)
|
|
562 {
|
|
563 return _mm_sll_pi16 (__m, __count);
|
|
564 }
|
|
565
|
|
566 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
567 _mm_slli_pi16 (__m64 __m, int __count)
|
|
568 {
|
|
569 return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
|
|
570 }
|
|
571
|
|
572 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
573 _m_psllwi (__m64 __m, int __count)
|
|
574 {
|
|
575 return _mm_slli_pi16 (__m, __count);
|
|
576 }
|
|
577
|
|
578 /* Shift two 32-bit values in M left by COUNT. */
|
|
579 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
580 _mm_sll_pi32 (__m64 __m, __m64 __count)
|
|
581 {
|
|
582 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
|
|
583 }
|
|
584
|
|
585 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
586 _m_pslld (__m64 __m, __m64 __count)
|
|
587 {
|
|
588 return _mm_sll_pi32 (__m, __count);
|
|
589 }
|
|
590
|
|
591 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
592 _mm_slli_pi32 (__m64 __m, int __count)
|
|
593 {
|
|
594 return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
|
|
595 }
|
|
596
|
|
597 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
598 _m_pslldi (__m64 __m, int __count)
|
|
599 {
|
|
600 return _mm_slli_pi32 (__m, __count);
|
|
601 }
|
|
602
|
|
603 /* Shift the 64-bit value in M left by COUNT. */
|
|
604 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
605 _mm_sll_si64 (__m64 __m, __m64 __count)
|
|
606 {
|
|
607 return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
|
|
608 }
|
|
609
|
|
610 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
611 _m_psllq (__m64 __m, __m64 __count)
|
|
612 {
|
|
613 return _mm_sll_si64 (__m, __count);
|
|
614 }
|
|
615
|
|
616 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
617 _mm_slli_si64 (__m64 __m, int __count)
|
|
618 {
|
|
619 return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
|
|
620 }
|
|
621
|
|
622 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
623 _m_psllqi (__m64 __m, int __count)
|
|
624 {
|
|
625 return _mm_slli_si64 (__m, __count);
|
|
626 }
|
|
627
|
|
628 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
|
|
629 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
630 _mm_sra_pi16 (__m64 __m, __m64 __count)
|
|
631 {
|
|
632 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
|
|
633 }
|
|
634
|
|
635 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
636 _m_psraw (__m64 __m, __m64 __count)
|
|
637 {
|
|
638 return _mm_sra_pi16 (__m, __count);
|
|
639 }
|
|
640
|
|
641 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
642 _mm_srai_pi16 (__m64 __m, int __count)
|
|
643 {
|
|
644 return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
|
|
645 }
|
|
646
|
|
647 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
648 _m_psrawi (__m64 __m, int __count)
|
|
649 {
|
|
650 return _mm_srai_pi16 (__m, __count);
|
|
651 }
|
|
652
|
|
653 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
|
|
654 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
655 _mm_sra_pi32 (__m64 __m, __m64 __count)
|
|
656 {
|
|
657 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
|
|
658 }
|
|
659
|
|
660 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
661 _m_psrad (__m64 __m, __m64 __count)
|
|
662 {
|
|
663 return _mm_sra_pi32 (__m, __count);
|
|
664 }
|
|
665
|
|
666 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
667 _mm_srai_pi32 (__m64 __m, int __count)
|
|
668 {
|
|
669 return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
|
|
670 }
|
|
671
|
|
672 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
673 _m_psradi (__m64 __m, int __count)
|
|
674 {
|
|
675 return _mm_srai_pi32 (__m, __count);
|
|
676 }
|
|
677
|
|
678 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
|
|
679 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
680 _mm_srl_pi16 (__m64 __m, __m64 __count)
|
|
681 {
|
|
682 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
|
|
683 }
|
|
684
|
|
685 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
686 _m_psrlw (__m64 __m, __m64 __count)
|
|
687 {
|
|
688 return _mm_srl_pi16 (__m, __count);
|
|
689 }
|
|
690
|
|
691 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
692 _mm_srli_pi16 (__m64 __m, int __count)
|
|
693 {
|
|
694 return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
|
|
695 }
|
|
696
|
|
697 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
698 _m_psrlwi (__m64 __m, int __count)
|
|
699 {
|
|
700 return _mm_srli_pi16 (__m, __count);
|
|
701 }
|
|
702
|
|
703 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
|
|
704 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
705 _mm_srl_pi32 (__m64 __m, __m64 __count)
|
|
706 {
|
|
707 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
|
|
708 }
|
|
709
|
|
710 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
711 _m_psrld (__m64 __m, __m64 __count)
|
|
712 {
|
|
713 return _mm_srl_pi32 (__m, __count);
|
|
714 }
|
|
715
|
|
716 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
717 _mm_srli_pi32 (__m64 __m, int __count)
|
|
718 {
|
|
719 return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
|
|
720 }
|
|
721
|
|
722 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
723 _m_psrldi (__m64 __m, int __count)
|
|
724 {
|
|
725 return _mm_srli_pi32 (__m, __count);
|
|
726 }
|
|
727
|
|
728 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
|
|
729 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
730 _mm_srl_si64 (__m64 __m, __m64 __count)
|
|
731 {
|
|
732 return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
|
|
733 }
|
|
734
|
|
735 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
736 _m_psrlq (__m64 __m, __m64 __count)
|
|
737 {
|
|
738 return _mm_srl_si64 (__m, __count);
|
|
739 }
|
|
740
|
|
741 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
742 _mm_srli_si64 (__m64 __m, int __count)
|
|
743 {
|
|
744 return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
|
|
745 }
|
|
746
|
|
747 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
748 _m_psrlqi (__m64 __m, int __count)
|
|
749 {
|
|
750 return _mm_srli_si64 (__m, __count);
|
|
751 }
|
|
752
|
|
753 /* Bit-wise AND the 64-bit values in M1 and M2. */
|
|
754 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
755 _mm_and_si64 (__m64 __m1, __m64 __m2)
|
|
756 {
|
|
757 return __builtin_ia32_pand (__m1, __m2);
|
|
758 }
|
|
759
|
|
760 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
761 _m_pand (__m64 __m1, __m64 __m2)
|
|
762 {
|
|
763 return _mm_and_si64 (__m1, __m2);
|
|
764 }
|
|
765
|
|
766 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
|
|
767 64-bit value in M2. */
|
|
768 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
769 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
|
|
770 {
|
|
771 return __builtin_ia32_pandn (__m1, __m2);
|
|
772 }
|
|
773
|
|
774 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
775 _m_pandn (__m64 __m1, __m64 __m2)
|
|
776 {
|
|
777 return _mm_andnot_si64 (__m1, __m2);
|
|
778 }
|
|
779
|
|
780 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
|
|
781 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
782 _mm_or_si64 (__m64 __m1, __m64 __m2)
|
|
783 {
|
|
784 return __builtin_ia32_por (__m1, __m2);
|
|
785 }
|
|
786
|
|
787 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
788 _m_por (__m64 __m1, __m64 __m2)
|
|
789 {
|
|
790 return _mm_or_si64 (__m1, __m2);
|
|
791 }
|
|
792
|
|
793 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
|
|
794 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
795 _mm_xor_si64 (__m64 __m1, __m64 __m2)
|
|
796 {
|
|
797 return __builtin_ia32_pxor (__m1, __m2);
|
|
798 }
|
|
799
|
|
800 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
801 _m_pxor (__m64 __m1, __m64 __m2)
|
|
802 {
|
|
803 return _mm_xor_si64 (__m1, __m2);
|
|
804 }
|
|
805
|
|
806 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
|
|
807 test is true and zero if false. */
|
|
808 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
809 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
|
|
810 {
|
|
811 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
|
|
812 }
|
|
813
|
|
814 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
815 _m_pcmpeqb (__m64 __m1, __m64 __m2)
|
|
816 {
|
|
817 return _mm_cmpeq_pi8 (__m1, __m2);
|
|
818 }
|
|
819
|
|
820 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
821 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
|
|
822 {
|
|
823 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
|
|
824 }
|
|
825
|
|
826 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
827 _m_pcmpgtb (__m64 __m1, __m64 __m2)
|
|
828 {
|
|
829 return _mm_cmpgt_pi8 (__m1, __m2);
|
|
830 }
|
|
831
|
|
832 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
|
|
833 the test is true and zero if false. */
|
|
834 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
835 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
|
|
836 {
|
|
837 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
|
|
838 }
|
|
839
|
|
840 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
841 _m_pcmpeqw (__m64 __m1, __m64 __m2)
|
|
842 {
|
|
843 return _mm_cmpeq_pi16 (__m1, __m2);
|
|
844 }
|
|
845
|
|
846 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
847 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
|
|
848 {
|
|
849 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
|
|
850 }
|
|
851
|
|
852 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
853 _m_pcmpgtw (__m64 __m1, __m64 __m2)
|
|
854 {
|
|
855 return _mm_cmpgt_pi16 (__m1, __m2);
|
|
856 }
|
|
857
|
|
858 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
|
|
859 the test is true and zero if false. */
|
|
860 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
861 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
|
|
862 {
|
|
863 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
|
|
864 }
|
|
865
|
|
866 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
867 _m_pcmpeqd (__m64 __m1, __m64 __m2)
|
|
868 {
|
|
869 return _mm_cmpeq_pi32 (__m1, __m2);
|
|
870 }
|
|
871
|
|
872 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
873 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
|
|
874 {
|
|
875 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
|
|
876 }
|
|
877
|
|
878 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
879 _m_pcmpgtd (__m64 __m1, __m64 __m2)
|
|
880 {
|
|
881 return _mm_cmpgt_pi32 (__m1, __m2);
|
|
882 }
|
|
883
|
|
884 /* Creates a 64-bit zero. */
|
|
885 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
886 _mm_setzero_si64 (void)
|
|
887 {
|
|
888 return (__m64)0LL;
|
|
889 }
|
|
890
|
|
891 /* Creates a vector of two 32-bit values; I0 is least significant. */
|
|
892 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
893 _mm_set_pi32 (int __i1, int __i0)
|
|
894 {
|
|
895 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
|
|
896 }
|
|
897
|
|
898 /* Creates a vector of four 16-bit values; W0 is least significant. */
|
|
899 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
900 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
|
|
901 {
|
|
902 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
|
|
903 }
|
|
904
|
|
905 /* Creates a vector of eight 8-bit values; B0 is least significant. */
|
|
906 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
907 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
|
|
908 char __b3, char __b2, char __b1, char __b0)
|
|
909 {
|
|
910 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
|
|
911 __b4, __b5, __b6, __b7);
|
|
912 }
|
|
913
|
|
914 /* Similar, but with the arguments in reverse order. */
|
|
915 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
916 _mm_setr_pi32 (int __i0, int __i1)
|
|
917 {
|
|
918 return _mm_set_pi32 (__i1, __i0);
|
|
919 }
|
|
920
|
|
921 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
922 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
|
|
923 {
|
|
924 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
|
|
925 }
|
|
926
|
|
927 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
928 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
|
|
929 char __b4, char __b5, char __b6, char __b7)
|
|
930 {
|
|
931 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
|
932 }
|
|
933
|
|
934 /* Creates a vector of two 32-bit values, both elements containing I. */
|
|
935 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
936 _mm_set1_pi32 (int __i)
|
|
937 {
|
|
938 return _mm_set_pi32 (__i, __i);
|
|
939 }
|
|
940
|
|
941 /* Creates a vector of four 16-bit values, all elements containing W. */
|
|
942 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
943 _mm_set1_pi16 (short __w)
|
|
944 {
|
|
945 return _mm_set_pi16 (__w, __w, __w, __w);
|
|
946 }
|
|
947
|
|
948 /* Creates a vector of eight 8-bit values, all elements containing B. */
|
|
949 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
950 _mm_set1_pi8 (char __b)
|
|
951 {
|
|
952 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
|
|
953 }
|
111
|
954 #ifdef __DISABLE_MMX__
|
|
955 #undef __DISABLE_MMX__
|
|
956 #pragma GCC pop_options
|
|
957 #endif /* __DISABLE_MMX__ */
|
0
|
958
|
|
959 #endif /* _MMINTRIN_H_INCLUDED */
|