0
|
1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
|
2 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 Under Section 7 of GPL version 3, you are granted additional
|
|
17 permissions described in the GCC Runtime Library Exception, version
|
|
18 3.1, as published by the Free Software Foundation.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License and
|
|
21 a copy of the GCC Runtime Library Exception along with this program;
|
|
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
23 <http://www.gnu.org/licenses/>. */
|
|
24
|
|
25 /* Implemented from the specification included in the Intel C++ Compiler
|
|
26 User Guide and Reference, version 9.0. */
|
|
27
|
|
28 #ifndef _MMINTRIN_H_INCLUDED
|
|
29 #define _MMINTRIN_H_INCLUDED
|
|
30
|
|
31 #ifndef __MMX__
|
|
32 # error "MMX instruction set not enabled"
|
|
33 #else
|
|
34 /* The Intel API is flexible enough that we must allow aliasing with other
|
|
35 vector types, and their scalar components. */
|
|
36 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
|
|
37
|
|
38 /* Internal data types for implementing the intrinsics. */
|
|
39 typedef int __v2si __attribute__ ((__vector_size__ (8)));
|
|
40 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
|
|
41 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
|
|
42 typedef long long __v1di __attribute__ ((__vector_size__ (8)));
|
|
43 typedef float __v2sf __attribute__ ((__vector_size__ (8)));
|
|
44
|
|
45 /* Empty the multimedia state. */
|
|
46 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
47 _mm_empty (void)
|
|
48 {
|
|
49 __builtin_ia32_emms ();
|
|
50 }
|
|
51
|
|
52 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
53 _m_empty (void)
|
|
54 {
|
|
55 _mm_empty ();
|
|
56 }
|
|
57
|
|
58 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
|
|
59 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
60 _mm_cvtsi32_si64 (int __i)
|
|
61 {
|
|
62 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
|
|
63 }
|
|
64
|
|
65 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
66 _m_from_int (int __i)
|
|
67 {
|
|
68 return _mm_cvtsi32_si64 (__i);
|
|
69 }
|
|
70
|
|
71 #ifdef __x86_64__
|
|
72 /* Convert I to a __m64 object. */
|
|
73
|
|
74 /* Intel intrinsic. */
|
|
75 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
76 _m_from_int64 (long long __i)
|
|
77 {
|
|
78 return (__m64) __i;
|
|
79 }
|
|
80
|
|
81 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
82 _mm_cvtsi64_m64 (long long __i)
|
|
83 {
|
|
84 return (__m64) __i;
|
|
85 }
|
|
86
|
|
87 /* Microsoft intrinsic. */
|
|
88 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
89 _mm_cvtsi64x_si64 (long long __i)
|
|
90 {
|
|
91 return (__m64) __i;
|
|
92 }
|
|
93
|
|
94 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
95 _mm_set_pi64x (long long __i)
|
|
96 {
|
|
97 return (__m64) __i;
|
|
98 }
|
|
99 #endif
|
|
100
|
|
101 /* Convert the lower 32 bits of the __m64 object into an integer. */
|
|
102 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
103 _mm_cvtsi64_si32 (__m64 __i)
|
|
104 {
|
|
105 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
|
|
106 }
|
|
107
|
|
108 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
109 _m_to_int (__m64 __i)
|
|
110 {
|
|
111 return _mm_cvtsi64_si32 (__i);
|
|
112 }
|
|
113
|
|
114 #ifdef __x86_64__
|
|
115 /* Convert the __m64 object to a 64bit integer. */
|
|
116
|
|
117 /* Intel intrinsic. */
|
|
118 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
119 _m_to_int64 (__m64 __i)
|
|
120 {
|
|
121 return (long long)__i;
|
|
122 }
|
|
123
|
|
124 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
125 _mm_cvtm64_si64 (__m64 __i)
|
|
126 {
|
|
127 return (long long)__i;
|
|
128 }
|
|
129
|
|
130 /* Microsoft intrinsic. */
|
|
131 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
132 _mm_cvtsi64_si64x (__m64 __i)
|
|
133 {
|
|
134 return (long long)__i;
|
|
135 }
|
|
136 #endif
|
|
137
|
|
138 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
|
139 the result, and the four 16-bit values from M2 into the upper four 8-bit
|
|
140 values of the result, all with signed saturation. */
|
|
141 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
142 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
|
|
143 {
|
|
144 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
|
|
145 }
|
|
146
|
|
147 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
148 _m_packsswb (__m64 __m1, __m64 __m2)
|
|
149 {
|
|
150 return _mm_packs_pi16 (__m1, __m2);
|
|
151 }
|
|
152
|
|
153 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
|
|
154 the result, and the two 32-bit values from M2 into the upper two 16-bit
|
|
155 values of the result, all with signed saturation. */
|
|
156 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
157 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
|
|
158 {
|
|
159 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
|
|
160 }
|
|
161
|
|
162 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
163 _m_packssdw (__m64 __m1, __m64 __m2)
|
|
164 {
|
|
165 return _mm_packs_pi32 (__m1, __m2);
|
|
166 }
|
|
167
|
|
168 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
|
169 the result, and the four 16-bit values from M2 into the upper four 8-bit
|
|
170 values of the result, all with unsigned saturation. */
|
|
171 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
172 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
|
|
173 {
|
|
174 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
|
|
175 }
|
|
176
|
|
177 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
178 _m_packuswb (__m64 __m1, __m64 __m2)
|
|
179 {
|
|
180 return _mm_packs_pu16 (__m1, __m2);
|
|
181 }
|
|
182
|
|
183 /* Interleave the four 8-bit values from the high half of M1 with the four
|
|
184 8-bit values from the high half of M2. */
|
|
185 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
186 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
|
|
187 {
|
|
188 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
|
|
189 }
|
|
190
|
|
191 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
192 _m_punpckhbw (__m64 __m1, __m64 __m2)
|
|
193 {
|
|
194 return _mm_unpackhi_pi8 (__m1, __m2);
|
|
195 }
|
|
196
|
|
197 /* Interleave the two 16-bit values from the high half of M1 with the two
|
|
198 16-bit values from the high half of M2. */
|
|
199 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
200 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
|
|
201 {
|
|
202 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
203 }
|
|
204
|
|
205 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
206 _m_punpckhwd (__m64 __m1, __m64 __m2)
|
|
207 {
|
|
208 return _mm_unpackhi_pi16 (__m1, __m2);
|
|
209 }
|
|
210
|
|
211 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
|
|
212 value from the high half of M2. */
|
|
213 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
214 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
|
|
215 {
|
|
216 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
|
|
217 }
|
|
218
|
|
219 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
220 _m_punpckhdq (__m64 __m1, __m64 __m2)
|
|
221 {
|
|
222 return _mm_unpackhi_pi32 (__m1, __m2);
|
|
223 }
|
|
224
|
|
225 /* Interleave the four 8-bit values from the low half of M1 with the four
|
|
226 8-bit values from the low half of M2. */
|
|
227 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
228 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
|
|
229 {
|
|
230 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
|
|
231 }
|
|
232
|
|
233 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
234 _m_punpcklbw (__m64 __m1, __m64 __m2)
|
|
235 {
|
|
236 return _mm_unpacklo_pi8 (__m1, __m2);
|
|
237 }
|
|
238
|
|
239 /* Interleave the two 16-bit values from the low half of M1 with the two
|
|
240 16-bit values from the low half of M2. */
|
|
241 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
242 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
|
|
243 {
|
|
244 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
245 }
|
|
246
|
|
247 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
248 _m_punpcklwd (__m64 __m1, __m64 __m2)
|
|
249 {
|
|
250 return _mm_unpacklo_pi16 (__m1, __m2);
|
|
251 }
|
|
252
|
|
253 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
|
|
254 value from the low half of M2. */
|
|
255 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
256 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
|
|
257 {
|
|
258 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
|
|
259 }
|
|
260
|
|
261 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
262 _m_punpckldq (__m64 __m1, __m64 __m2)
|
|
263 {
|
|
264 return _mm_unpacklo_pi32 (__m1, __m2);
|
|
265 }
|
|
266
|
|
267 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
|
|
268 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
269 _mm_add_pi8 (__m64 __m1, __m64 __m2)
|
|
270 {
|
|
271 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
|
|
272 }
|
|
273
|
|
274 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
275 _m_paddb (__m64 __m1, __m64 __m2)
|
|
276 {
|
|
277 return _mm_add_pi8 (__m1, __m2);
|
|
278 }
|
|
279
|
|
280 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
|
|
281 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
282 _mm_add_pi16 (__m64 __m1, __m64 __m2)
|
|
283 {
|
|
284 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
|
|
285 }
|
|
286
|
|
287 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
288 _m_paddw (__m64 __m1, __m64 __m2)
|
|
289 {
|
|
290 return _mm_add_pi16 (__m1, __m2);
|
|
291 }
|
|
292
|
|
293 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
|
|
294 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
295 _mm_add_pi32 (__m64 __m1, __m64 __m2)
|
|
296 {
|
|
297 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
|
|
298 }
|
|
299
|
|
300 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
301 _m_paddd (__m64 __m1, __m64 __m2)
|
|
302 {
|
|
303 return _mm_add_pi32 (__m1, __m2);
|
|
304 }
|
|
305
|
|
306 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
|
307 #ifdef __SSE2__
|
|
308 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
309 _mm_add_si64 (__m64 __m1, __m64 __m2)
|
|
310 {
|
|
311 return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
|
|
312 }
|
|
313 #endif
|
|
314
|
|
315 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
|
|
316 saturated arithmetic. */
|
|
317 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
318 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
|
|
319 {
|
|
320 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
|
|
321 }
|
|
322
|
|
323 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
324 _m_paddsb (__m64 __m1, __m64 __m2)
|
|
325 {
|
|
326 return _mm_adds_pi8 (__m1, __m2);
|
|
327 }
|
|
328
|
|
329 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
|
|
330 saturated arithmetic. */
|
|
331 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
332 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
|
|
333 {
|
|
334 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
|
|
335 }
|
|
336
|
|
337 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
338 _m_paddsw (__m64 __m1, __m64 __m2)
|
|
339 {
|
|
340 return _mm_adds_pi16 (__m1, __m2);
|
|
341 }
|
|
342
|
|
343 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
|
|
344 saturated arithmetic. */
|
|
345 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
346 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
|
|
347 {
|
|
348 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
|
|
349 }
|
|
350
|
|
351 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
352 _m_paddusb (__m64 __m1, __m64 __m2)
|
|
353 {
|
|
354 return _mm_adds_pu8 (__m1, __m2);
|
|
355 }
|
|
356
|
|
357 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
|
|
358 saturated arithmetic. */
|
|
359 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
360 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
|
|
361 {
|
|
362 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
|
|
363 }
|
|
364
|
|
365 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
366 _m_paddusw (__m64 __m1, __m64 __m2)
|
|
367 {
|
|
368 return _mm_adds_pu16 (__m1, __m2);
|
|
369 }
|
|
370
|
|
371 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
|
|
372 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
373 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
|
|
374 {
|
|
375 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
|
|
376 }
|
|
377
|
|
378 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
379 _m_psubb (__m64 __m1, __m64 __m2)
|
|
380 {
|
|
381 return _mm_sub_pi8 (__m1, __m2);
|
|
382 }
|
|
383
|
|
384 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
|
|
385 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
386 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
|
|
387 {
|
|
388 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
|
|
389 }
|
|
390
|
|
391 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
392 _m_psubw (__m64 __m1, __m64 __m2)
|
|
393 {
|
|
394 return _mm_sub_pi16 (__m1, __m2);
|
|
395 }
|
|
396
|
|
397 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
|
|
398 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
399 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
|
|
400 {
|
|
401 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
|
|
402 }
|
|
403
|
|
404 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
405 _m_psubd (__m64 __m1, __m64 __m2)
|
|
406 {
|
|
407 return _mm_sub_pi32 (__m1, __m2);
|
|
408 }
|
|
409
|
|
410 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
|
411 #ifdef __SSE2__
|
|
412 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
413 _mm_sub_si64 (__m64 __m1, __m64 __m2)
|
|
414 {
|
|
415 return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
|
|
416 }
|
|
417 #endif
|
|
418
|
|
419 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
|
|
420 saturating arithmetic. */
|
|
421 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
422 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
|
|
423 {
|
|
424 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
|
|
425 }
|
|
426
|
|
427 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
428 _m_psubsb (__m64 __m1, __m64 __m2)
|
|
429 {
|
|
430 return _mm_subs_pi8 (__m1, __m2);
|
|
431 }
|
|
432
|
|
433 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
|
434 signed saturating arithmetic. */
|
|
435 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
436 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
|
|
437 {
|
|
438 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
|
|
439 }
|
|
440
|
|
441 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
442 _m_psubsw (__m64 __m1, __m64 __m2)
|
|
443 {
|
|
444 return _mm_subs_pi16 (__m1, __m2);
|
|
445 }
|
|
446
|
|
447 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
|
|
448 unsigned saturating arithmetic. */
|
|
449 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
450 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
|
|
451 {
|
|
452 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
|
|
453 }
|
|
454
|
|
455 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
456 _m_psubusb (__m64 __m1, __m64 __m2)
|
|
457 {
|
|
458 return _mm_subs_pu8 (__m1, __m2);
|
|
459 }
|
|
460
|
|
461 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
|
462 unsigned saturating arithmetic. */
|
|
463 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
464 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
|
|
465 {
|
|
466 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
|
|
467 }
|
|
468
|
|
469 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
470 _m_psubusw (__m64 __m1, __m64 __m2)
|
|
471 {
|
|
472 return _mm_subs_pu16 (__m1, __m2);
|
|
473 }
|
|
474
|
|
475 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
|
|
476 four 32-bit intermediate results, which are then summed by pairs to
|
|
477 produce two 32-bit results. */
|
|
478 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
479 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
|
|
480 {
|
|
481 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
|
|
482 }
|
|
483
|
|
484 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
485 _m_pmaddwd (__m64 __m1, __m64 __m2)
|
|
486 {
|
|
487 return _mm_madd_pi16 (__m1, __m2);
|
|
488 }
|
|
489
|
|
490 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
|
|
491 M2 and produce the high 16 bits of the 32-bit results. */
|
|
492 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
493 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
|
|
494 {
|
|
495 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
|
|
496 }
|
|
497
|
|
498 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
499 _m_pmulhw (__m64 __m1, __m64 __m2)
|
|
500 {
|
|
501 return _mm_mulhi_pi16 (__m1, __m2);
|
|
502 }
|
|
503
|
|
504 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
|
|
505 the low 16 bits of the results. */
|
|
506 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
507 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
|
|
508 {
|
|
509 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
|
|
510 }
|
|
511
|
|
512 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
513 _m_pmullw (__m64 __m1, __m64 __m2)
|
|
514 {
|
|
515 return _mm_mullo_pi16 (__m1, __m2);
|
|
516 }
|
|
517
|
|
518 /* Shift four 16-bit values in M left by COUNT. */
|
|
519 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
520 _mm_sll_pi16 (__m64 __m, __m64 __count)
|
|
521 {
|
|
522 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
|
|
523 }
|
|
524
|
|
525 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
526 _m_psllw (__m64 __m, __m64 __count)
|
|
527 {
|
|
528 return _mm_sll_pi16 (__m, __count);
|
|
529 }
|
|
530
|
|
531 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
532 _mm_slli_pi16 (__m64 __m, int __count)
|
|
533 {
|
|
534 return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
|
|
535 }
|
|
536
|
|
537 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
538 _m_psllwi (__m64 __m, int __count)
|
|
539 {
|
|
540 return _mm_slli_pi16 (__m, __count);
|
|
541 }
|
|
542
|
|
543 /* Shift two 32-bit values in M left by COUNT. */
|
|
544 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
545 _mm_sll_pi32 (__m64 __m, __m64 __count)
|
|
546 {
|
|
547 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
|
|
548 }
|
|
549
|
|
550 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
551 _m_pslld (__m64 __m, __m64 __count)
|
|
552 {
|
|
553 return _mm_sll_pi32 (__m, __count);
|
|
554 }
|
|
555
|
|
556 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
557 _mm_slli_pi32 (__m64 __m, int __count)
|
|
558 {
|
|
559 return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
|
|
560 }
|
|
561
|
|
562 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
563 _m_pslldi (__m64 __m, int __count)
|
|
564 {
|
|
565 return _mm_slli_pi32 (__m, __count);
|
|
566 }
|
|
567
|
|
568 /* Shift the 64-bit value in M left by COUNT. */
|
|
569 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
570 _mm_sll_si64 (__m64 __m, __m64 __count)
|
|
571 {
|
|
572 return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
|
|
573 }
|
|
574
|
|
575 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
576 _m_psllq (__m64 __m, __m64 __count)
|
|
577 {
|
|
578 return _mm_sll_si64 (__m, __count);
|
|
579 }
|
|
580
|
|
581 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
582 _mm_slli_si64 (__m64 __m, int __count)
|
|
583 {
|
|
584 return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
|
|
585 }
|
|
586
|
|
587 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
588 _m_psllqi (__m64 __m, int __count)
|
|
589 {
|
|
590 return _mm_slli_si64 (__m, __count);
|
|
591 }
|
|
592
|
|
593 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
|
|
594 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
595 _mm_sra_pi16 (__m64 __m, __m64 __count)
|
|
596 {
|
|
597 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
|
|
598 }
|
|
599
|
|
600 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
601 _m_psraw (__m64 __m, __m64 __count)
|
|
602 {
|
|
603 return _mm_sra_pi16 (__m, __count);
|
|
604 }
|
|
605
|
|
606 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
607 _mm_srai_pi16 (__m64 __m, int __count)
|
|
608 {
|
|
609 return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
|
|
610 }
|
|
611
|
|
612 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
613 _m_psrawi (__m64 __m, int __count)
|
|
614 {
|
|
615 return _mm_srai_pi16 (__m, __count);
|
|
616 }
|
|
617
|
|
618 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
|
|
619 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
620 _mm_sra_pi32 (__m64 __m, __m64 __count)
|
|
621 {
|
|
622 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
|
|
623 }
|
|
624
|
|
625 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
626 _m_psrad (__m64 __m, __m64 __count)
|
|
627 {
|
|
628 return _mm_sra_pi32 (__m, __count);
|
|
629 }
|
|
630
|
|
631 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
632 _mm_srai_pi32 (__m64 __m, int __count)
|
|
633 {
|
|
634 return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
|
|
635 }
|
|
636
|
|
637 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
638 _m_psradi (__m64 __m, int __count)
|
|
639 {
|
|
640 return _mm_srai_pi32 (__m, __count);
|
|
641 }
|
|
642
|
|
643 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
|
|
644 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
645 _mm_srl_pi16 (__m64 __m, __m64 __count)
|
|
646 {
|
|
647 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
|
|
648 }
|
|
649
|
|
650 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
651 _m_psrlw (__m64 __m, __m64 __count)
|
|
652 {
|
|
653 return _mm_srl_pi16 (__m, __count);
|
|
654 }
|
|
655
|
|
656 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
657 _mm_srli_pi16 (__m64 __m, int __count)
|
|
658 {
|
|
659 return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
|
|
660 }
|
|
661
|
|
662 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
663 _m_psrlwi (__m64 __m, int __count)
|
|
664 {
|
|
665 return _mm_srli_pi16 (__m, __count);
|
|
666 }
|
|
667
|
|
668 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
|
|
669 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
670 _mm_srl_pi32 (__m64 __m, __m64 __count)
|
|
671 {
|
|
672 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
|
|
673 }
|
|
674
|
|
675 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
676 _m_psrld (__m64 __m, __m64 __count)
|
|
677 {
|
|
678 return _mm_srl_pi32 (__m, __count);
|
|
679 }
|
|
680
|
|
681 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
682 _mm_srli_pi32 (__m64 __m, int __count)
|
|
683 {
|
|
684 return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
|
|
685 }
|
|
686
|
|
687 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
688 _m_psrldi (__m64 __m, int __count)
|
|
689 {
|
|
690 return _mm_srli_pi32 (__m, __count);
|
|
691 }
|
|
692
|
|
693 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
|
|
694 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
695 _mm_srl_si64 (__m64 __m, __m64 __count)
|
|
696 {
|
|
697 return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
|
|
698 }
|
|
699
|
|
700 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
701 _m_psrlq (__m64 __m, __m64 __count)
|
|
702 {
|
|
703 return _mm_srl_si64 (__m, __count);
|
|
704 }
|
|
705
|
|
706 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
707 _mm_srli_si64 (__m64 __m, int __count)
|
|
708 {
|
|
709 return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
|
|
710 }
|
|
711
|
|
712 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
713 _m_psrlqi (__m64 __m, int __count)
|
|
714 {
|
|
715 return _mm_srli_si64 (__m, __count);
|
|
716 }
|
|
717
|
|
718 /* Bit-wise AND the 64-bit values in M1 and M2. */
|
|
719 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
720 _mm_and_si64 (__m64 __m1, __m64 __m2)
|
|
721 {
|
|
722 return __builtin_ia32_pand (__m1, __m2);
|
|
723 }
|
|
724
|
|
725 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
726 _m_pand (__m64 __m1, __m64 __m2)
|
|
727 {
|
|
728 return _mm_and_si64 (__m1, __m2);
|
|
729 }
|
|
730
|
|
731 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
|
|
732 64-bit value in M2. */
|
|
733 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
734 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
|
|
735 {
|
|
736 return __builtin_ia32_pandn (__m1, __m2);
|
|
737 }
|
|
738
|
|
739 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
740 _m_pandn (__m64 __m1, __m64 __m2)
|
|
741 {
|
|
742 return _mm_andnot_si64 (__m1, __m2);
|
|
743 }
|
|
744
|
|
745 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
|
|
746 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
747 _mm_or_si64 (__m64 __m1, __m64 __m2)
|
|
748 {
|
|
749 return __builtin_ia32_por (__m1, __m2);
|
|
750 }
|
|
751
|
|
752 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
753 _m_por (__m64 __m1, __m64 __m2)
|
|
754 {
|
|
755 return _mm_or_si64 (__m1, __m2);
|
|
756 }
|
|
757
|
|
758 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
|
|
759 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
760 _mm_xor_si64 (__m64 __m1, __m64 __m2)
|
|
761 {
|
|
762 return __builtin_ia32_pxor (__m1, __m2);
|
|
763 }
|
|
764
|
|
765 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
766 _m_pxor (__m64 __m1, __m64 __m2)
|
|
767 {
|
|
768 return _mm_xor_si64 (__m1, __m2);
|
|
769 }
|
|
770
|
|
771 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
|
|
772 test is true and zero if false. */
|
|
773 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
774 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
|
|
775 {
|
|
776 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
|
|
777 }
|
|
778
|
|
779 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
780 _m_pcmpeqb (__m64 __m1, __m64 __m2)
|
|
781 {
|
|
782 return _mm_cmpeq_pi8 (__m1, __m2);
|
|
783 }
|
|
784
|
|
785 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
786 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
|
|
787 {
|
|
788 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
|
|
789 }
|
|
790
|
|
791 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
792 _m_pcmpgtb (__m64 __m1, __m64 __m2)
|
|
793 {
|
|
794 return _mm_cmpgt_pi8 (__m1, __m2);
|
|
795 }
|
|
796
|
|
797 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
|
|
798 the test is true and zero if false. */
|
|
799 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
800 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
|
|
801 {
|
|
802 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
|
|
803 }
|
|
804
|
|
805 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
806 _m_pcmpeqw (__m64 __m1, __m64 __m2)
|
|
807 {
|
|
808 return _mm_cmpeq_pi16 (__m1, __m2);
|
|
809 }
|
|
810
|
|
811 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
812 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
|
|
813 {
|
|
814 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
|
|
815 }
|
|
816
|
|
817 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
818 _m_pcmpgtw (__m64 __m1, __m64 __m2)
|
|
819 {
|
|
820 return _mm_cmpgt_pi16 (__m1, __m2);
|
|
821 }
|
|
822
|
|
823 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
|
|
824 the test is true and zero if false. */
|
|
825 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
826 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
|
|
827 {
|
|
828 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
|
|
829 }
|
|
830
|
|
831 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
832 _m_pcmpeqd (__m64 __m1, __m64 __m2)
|
|
833 {
|
|
834 return _mm_cmpeq_pi32 (__m1, __m2);
|
|
835 }
|
|
836
|
|
837 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
838 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
|
|
839 {
|
|
840 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
|
|
841 }
|
|
842
|
|
843 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
844 _m_pcmpgtd (__m64 __m1, __m64 __m2)
|
|
845 {
|
|
846 return _mm_cmpgt_pi32 (__m1, __m2);
|
|
847 }
|
|
848
|
|
849 /* Creates a 64-bit zero. */
|
|
850 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
851 _mm_setzero_si64 (void)
|
|
852 {
|
|
853 return (__m64)0LL;
|
|
854 }
|
|
855
|
|
856 /* Creates a vector of two 32-bit values; I0 is least significant. */
|
|
857 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
858 _mm_set_pi32 (int __i1, int __i0)
|
|
859 {
|
|
860 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
|
|
861 }
|
|
862
|
|
863 /* Creates a vector of four 16-bit values; W0 is least significant. */
|
|
864 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
865 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
|
|
866 {
|
|
867 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
|
|
868 }
|
|
869
|
|
870 /* Creates a vector of eight 8-bit values; B0 is least significant. */
|
|
871 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
872 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
|
|
873 char __b3, char __b2, char __b1, char __b0)
|
|
874 {
|
|
875 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
|
|
876 __b4, __b5, __b6, __b7);
|
|
877 }
|
|
878
|
|
879 /* Similar, but with the arguments in reverse order. */
|
|
880 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
881 _mm_setr_pi32 (int __i0, int __i1)
|
|
882 {
|
|
883 return _mm_set_pi32 (__i1, __i0);
|
|
884 }
|
|
885
|
|
886 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
887 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
|
|
888 {
|
|
889 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
|
|
890 }
|
|
891
|
|
892 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
893 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
|
|
894 char __b4, char __b5, char __b6, char __b7)
|
|
895 {
|
|
896 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
|
897 }
|
|
898
|
|
899 /* Creates a vector of two 32-bit values, both elements containing I. */
|
|
900 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
901 _mm_set1_pi32 (int __i)
|
|
902 {
|
|
903 return _mm_set_pi32 (__i, __i);
|
|
904 }
|
|
905
|
|
906 /* Creates a vector of four 16-bit values, all elements containing W. */
|
|
907 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
908 _mm_set1_pi16 (short __w)
|
|
909 {
|
|
910 return _mm_set_pi16 (__w, __w, __w, __w);
|
|
911 }
|
|
912
|
|
913 /* Creates a vector of eight 8-bit values, all elements containing B. */
|
|
914 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
915 _mm_set1_pi8 (char __b)
|
|
916 {
|
|
917 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
|
|
918 }
|
|
919
|
|
920 #endif /* __MMX__ */
|
|
921 #endif /* _MMINTRIN_H_INCLUDED */
|