0
|
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify
|
|
6 it under the terms of the GNU General Public License as published by
|
|
7 the Free Software Foundation; either version 3, or (at your option)
|
|
8 any later version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU General Public License for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 /* Implemented from the specification included in the Intel C++ Compiler
|
|
25 User Guide and Reference, version 9.1. */
|
|
26
|
|
27 #ifndef _TMMINTRIN_H_INCLUDED
|
|
28 #define _TMMINTRIN_H_INCLUDED
|
|
29
|
|
30 #ifndef __SSSE3__
|
|
31 # error "SSSE3 instruction set not enabled"
|
|
32 #else
|
|
33
|
|
34 /* We need definitions from the SSE3, SSE2 and SSE header files*/
|
|
35 #include <pmmintrin.h>
|
|
36
|
|
37 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
38 _mm_hadd_epi16 (__m128i __X, __m128i __Y)
|
|
39 {
|
|
40 return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
41 }
|
|
42
|
|
43 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
44 _mm_hadd_epi32 (__m128i __X, __m128i __Y)
|
|
45 {
|
|
46 return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
|
|
47 }
|
|
48
|
|
49 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
50 _mm_hadds_epi16 (__m128i __X, __m128i __Y)
|
|
51 {
|
|
52 return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
53 }
|
|
54
|
|
55 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
56 _mm_hadd_pi16 (__m64 __X, __m64 __Y)
|
|
57 {
|
|
58 return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
|
|
59 }
|
|
60
|
|
61 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
62 _mm_hadd_pi32 (__m64 __X, __m64 __Y)
|
|
63 {
|
|
64 return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
|
|
65 }
|
|
66
|
|
67 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
68 _mm_hadds_pi16 (__m64 __X, __m64 __Y)
|
|
69 {
|
|
70 return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
|
|
71 }
|
|
72
|
|
73 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
74 _mm_hsub_epi16 (__m128i __X, __m128i __Y)
|
|
75 {
|
|
76 return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
77 }
|
|
78
|
|
79 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
80 _mm_hsub_epi32 (__m128i __X, __m128i __Y)
|
|
81 {
|
|
82 return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
|
|
83 }
|
|
84
|
|
85 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
86 _mm_hsubs_epi16 (__m128i __X, __m128i __Y)
|
|
87 {
|
|
88 return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
89 }
|
|
90
|
|
91 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
92 _mm_hsub_pi16 (__m64 __X, __m64 __Y)
|
|
93 {
|
|
94 return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
|
|
95 }
|
|
96
|
|
97 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
98 _mm_hsub_pi32 (__m64 __X, __m64 __Y)
|
|
99 {
|
|
100 return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
|
|
101 }
|
|
102
|
|
103 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
104 _mm_hsubs_pi16 (__m64 __X, __m64 __Y)
|
|
105 {
|
|
106 return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
|
|
107 }
|
|
108
|
|
109 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
110 _mm_maddubs_epi16 (__m128i __X, __m128i __Y)
|
|
111 {
|
|
112 return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
|
|
113 }
|
|
114
|
|
115 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
116 _mm_maddubs_pi16 (__m64 __X, __m64 __Y)
|
|
117 {
|
|
118 return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
|
|
119 }
|
|
120
|
|
121 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
122 _mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
|
|
123 {
|
|
124 return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
125 }
|
|
126
|
|
127 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
128 _mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
|
|
129 {
|
|
130 return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
|
|
131 }
|
|
132
|
|
133 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
134 _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
|
|
135 {
|
|
136 return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
|
|
137 }
|
|
138
|
|
139 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
140 _mm_shuffle_pi8 (__m64 __X, __m64 __Y)
|
|
141 {
|
|
142 return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
|
|
143 }
|
|
144
|
|
145 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
146 _mm_sign_epi8 (__m128i __X, __m128i __Y)
|
|
147 {
|
|
148 return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
|
|
149 }
|
|
150
|
|
151 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
152 _mm_sign_epi16 (__m128i __X, __m128i __Y)
|
|
153 {
|
|
154 return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
|
|
155 }
|
|
156
|
|
157 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
158 _mm_sign_epi32 (__m128i __X, __m128i __Y)
|
|
159 {
|
|
160 return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
|
|
161 }
|
|
162
|
|
163 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
164 _mm_sign_pi8 (__m64 __X, __m64 __Y)
|
|
165 {
|
|
166 return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
|
|
167 }
|
|
168
|
|
169 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
170 _mm_sign_pi16 (__m64 __X, __m64 __Y)
|
|
171 {
|
|
172 return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
|
|
173 }
|
|
174
|
|
175 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
176 _mm_sign_pi32 (__m64 __X, __m64 __Y)
|
|
177 {
|
|
178 return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
|
|
179 }
|
|
180
|
|
181 #ifdef __OPTIMIZE__
|
|
182 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
183 _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
|
|
184 {
|
|
185 return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
|
|
186 (__v2di)__Y, __N * 8);
|
|
187 }
|
|
188
|
|
189 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
190 _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
|
|
191 {
|
|
192 return (__m64) __builtin_ia32_palignr ((__v1di)__X,
|
|
193 (__v1di)__Y, __N * 8);
|
|
194 }
|
|
195 #else
|
|
196 #define _mm_alignr_epi8(X, Y, N) \
|
|
197 ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \
|
|
198 (__v2di)(__m128i)(Y), \
|
|
199 (int)(N) * 8))
|
|
200 #define _mm_alignr_pi8(X, Y, N) \
|
|
201 ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X), \
|
|
202 (__v1di)(__m64)(Y), \
|
|
203 (int)(N) * 8))
|
|
204 #endif
|
|
205
|
|
206 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
207 _mm_abs_epi8 (__m128i __X)
|
|
208 {
|
|
209 return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
|
|
210 }
|
|
211
|
|
212 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
213 _mm_abs_epi16 (__m128i __X)
|
|
214 {
|
|
215 return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
|
|
216 }
|
|
217
|
|
218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
219 _mm_abs_epi32 (__m128i __X)
|
|
220 {
|
|
221 return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
|
|
222 }
|
|
223
|
|
224 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
225 _mm_abs_pi8 (__m64 __X)
|
|
226 {
|
|
227 return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
|
|
228 }
|
|
229
|
|
230 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
231 _mm_abs_pi16 (__m64 __X)
|
|
232 {
|
|
233 return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
|
|
234 }
|
|
235
|
|
236 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
237 _mm_abs_pi32 (__m64 __X)
|
|
238 {
|
|
239 return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
|
|
240 }
|
|
241
|
|
242 #endif /* __SSSE3__ */
|
|
243
|
|
244 #endif /* _TMMINTRIN_H_INCLUDED */
|