Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/avx512erintrin.h @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify | |
6 it under the terms of the GNU General Public License as published by | |
7 the Free Software Foundation; either version 3, or (at your option) | |
8 any later version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 #ifndef _IMMINTRIN_H_INCLUDED | |
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." | |
26 #endif | |
27 | |
28 #ifndef _AVX512ERINTRIN_H_INCLUDED | |
29 #define _AVX512ERINTRIN_H_INCLUDED | |
30 | |
31 #ifndef __AVX512ER__ | |
32 #pragma GCC push_options | |
33 #pragma GCC target("avx512er") | |
34 #define __DISABLE_AVX512ER__ | |
35 #endif /* __AVX512ER__ */ | |
36 | |
37 /* Internal data types for implementing the intrinsics. */ | |
38 typedef double __v8df __attribute__ ((__vector_size__ (64))); | |
39 typedef float __v16sf __attribute__ ((__vector_size__ (64))); | |
40 | |
41 /* The Intel API is flexible enough that we must allow aliasing with other | |
42 vector types, and their scalar components. */ | |
43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); | |
44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); | |
45 | |
46 typedef unsigned char __mmask8; | |
47 typedef unsigned short __mmask16; | |
48 | |
49 #ifdef __OPTIMIZE__ | |
50 extern __inline __m512d | |
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
52 _mm512_exp2a23_round_pd (__m512d __A, int __R) | |
53 { | |
54 __m512d __W; | |
55 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, | |
56 (__v8df) __W, | |
57 (__mmask8) -1, __R); | |
58 } | |
59 | |
60 extern __inline __m512d | |
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) | |
63 { | |
64 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, | |
65 (__v8df) __W, | |
66 (__mmask8) __U, __R); | |
67 } | |
68 | |
69 extern __inline __m512d | |
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R) | |
72 { | |
73 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, | |
74 (__v8df) _mm512_setzero_pd (), | |
75 (__mmask8) __U, __R); | |
76 } | |
77 | |
78 extern __inline __m512 | |
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
80 _mm512_exp2a23_round_ps (__m512 __A, int __R) | |
81 { | |
82 __m512 __W; | |
83 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, | |
84 (__v16sf) __W, | |
85 (__mmask16) -1, __R); | |
86 } | |
87 | |
88 extern __inline __m512 | |
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) | |
91 { | |
92 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, | |
93 (__v16sf) __W, | |
94 (__mmask16) __U, __R); | |
95 } | |
96 | |
97 extern __inline __m512 | |
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R) | |
100 { | |
101 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, | |
102 (__v16sf) _mm512_setzero_ps (), | |
103 (__mmask16) __U, __R); | |
104 } | |
105 | |
106 extern __inline __m512d | |
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
108 _mm512_rcp28_round_pd (__m512d __A, int __R) | |
109 { | |
110 __m512d __W; | |
111 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, | |
112 (__v8df) __W, | |
113 (__mmask8) -1, __R); | |
114 } | |
115 | |
116 extern __inline __m512d | |
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) | |
119 { | |
120 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, | |
121 (__v8df) __W, | |
122 (__mmask8) __U, __R); | |
123 } | |
124 | |
125 extern __inline __m512d | |
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R) | |
128 { | |
129 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, | |
130 (__v8df) _mm512_setzero_pd (), | |
131 (__mmask8) __U, __R); | |
132 } | |
133 | |
134 extern __inline __m512 | |
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
136 _mm512_rcp28_round_ps (__m512 __A, int __R) | |
137 { | |
138 __m512 __W; | |
139 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, | |
140 (__v16sf) __W, | |
141 (__mmask16) -1, __R); | |
142 } | |
143 | |
144 extern __inline __m512 | |
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) | |
147 { | |
148 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, | |
149 (__v16sf) __W, | |
150 (__mmask16) __U, __R); | |
151 } | |
152 | |
153 extern __inline __m512 | |
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R) | |
156 { | |
157 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, | |
158 (__v16sf) _mm512_setzero_ps (), | |
159 (__mmask16) __U, __R); | |
160 } | |
161 | |
162 extern __inline __m128d | |
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) | |
165 { | |
166 return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B, | |
167 (__v2df) __A, | |
168 __R); | |
169 } | |
170 | |
171 extern __inline __m128 | |
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
173 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) | |
174 { | |
175 return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B, | |
176 (__v4sf) __A, | |
177 __R); | |
178 } | |
179 | |
180 extern __inline __m512d | |
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
182 _mm512_rsqrt28_round_pd (__m512d __A, int __R) | |
183 { | |
184 __m512d __W; | |
185 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, | |
186 (__v8df) __W, | |
187 (__mmask8) -1, __R); | |
188 } | |
189 | |
190 extern __inline __m512d | |
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
192 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) | |
193 { | |
194 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, | |
195 (__v8df) __W, | |
196 (__mmask8) __U, __R); | |
197 } | |
198 | |
199 extern __inline __m512d | |
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
201 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R) | |
202 { | |
203 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, | |
204 (__v8df) _mm512_setzero_pd (), | |
205 (__mmask8) __U, __R); | |
206 } | |
207 | |
208 extern __inline __m512 | |
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
210 _mm512_rsqrt28_round_ps (__m512 __A, int __R) | |
211 { | |
212 __m512 __W; | |
213 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, | |
214 (__v16sf) __W, | |
215 (__mmask16) -1, __R); | |
216 } | |
217 | |
218 extern __inline __m512 | |
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
220 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) | |
221 { | |
222 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, | |
223 (__v16sf) __W, | |
224 (__mmask16) __U, __R); | |
225 } | |
226 | |
227 extern __inline __m512 | |
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
229 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) | |
230 { | |
231 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, | |
232 (__v16sf) _mm512_setzero_ps (), | |
233 (__mmask16) __U, __R); | |
234 } | |
235 | |
236 extern __inline __m128d | |
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
238 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) | |
239 { | |
240 return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B, | |
241 (__v2df) __A, | |
242 __R); | |
243 } | |
244 | |
245 extern __inline __m128 | |
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
247 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) | |
248 { | |
249 return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B, | |
250 (__v4sf) __A, | |
251 __R); | |
252 } | |
253 | |
254 #else | |
255 #define _mm512_exp2a23_round_pd(A, C) \ | |
256 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) | |
257 | |
258 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \ | |
259 __builtin_ia32_exp2pd_mask(A, W, U, C) | |
260 | |
261 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \ | |
262 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) | |
263 | |
264 #define _mm512_exp2a23_round_ps(A, C) \ | |
265 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) | |
266 | |
267 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \ | |
268 __builtin_ia32_exp2ps_mask(A, W, U, C) | |
269 | |
270 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \ | |
271 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) | |
272 | |
273 #define _mm512_rcp28_round_pd(A, C) \ | |
274 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) | |
275 | |
276 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \ | |
277 __builtin_ia32_rcp28pd_mask(A, W, U, C) | |
278 | |
279 #define _mm512_maskz_rcp28_round_pd(U, A, C) \ | |
280 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) | |
281 | |
282 #define _mm512_rcp28_round_ps(A, C) \ | |
283 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) | |
284 | |
285 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \ | |
286 __builtin_ia32_rcp28ps_mask(A, W, U, C) | |
287 | |
288 #define _mm512_maskz_rcp28_round_ps(U, A, C) \ | |
289 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) | |
290 | |
291 #define _mm512_rsqrt28_round_pd(A, C) \ | |
292 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) | |
293 | |
294 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \ | |
295 __builtin_ia32_rsqrt28pd_mask(A, W, U, C) | |
296 | |
297 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \ | |
298 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) | |
299 | |
300 #define _mm512_rsqrt28_round_ps(A, C) \ | |
301 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) | |
302 | |
303 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \ | |
304 __builtin_ia32_rsqrt28ps_mask(A, W, U, C) | |
305 | |
306 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \ | |
307 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) | |
308 | |
309 #define _mm_rcp28_round_sd(A, B, R) \ | |
310 __builtin_ia32_rcp28sd_round(A, B, R) | |
311 | |
312 #define _mm_rcp28_round_ss(A, B, R) \ | |
313 __builtin_ia32_rcp28ss_round(A, B, R) | |
314 | |
315 #define _mm_rsqrt28_round_sd(A, B, R) \ | |
316 __builtin_ia32_rsqrt28sd_round(A, B, R) | |
317 | |
318 #define _mm_rsqrt28_round_ss(A, B, R) \ | |
319 __builtin_ia32_rsqrt28ss_round(A, B, R) | |
320 | |
321 #endif | |
322 | |
323 #define _mm512_exp2a23_pd(A) \ | |
324 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION) | |
325 | |
326 #define _mm512_mask_exp2a23_pd(W, U, A) \ | |
327 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
328 | |
329 #define _mm512_maskz_exp2a23_pd(U, A) \ | |
330 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) | |
331 | |
332 #define _mm512_exp2a23_ps(A) \ | |
333 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION) | |
334 | |
335 #define _mm512_mask_exp2a23_ps(W, U, A) \ | |
336 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
337 | |
338 #define _mm512_maskz_exp2a23_ps(U, A) \ | |
339 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) | |
340 | |
341 #define _mm512_rcp28_pd(A) \ | |
342 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION) | |
343 | |
344 #define _mm512_mask_rcp28_pd(W, U, A) \ | |
345 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
346 | |
347 #define _mm512_maskz_rcp28_pd(U, A) \ | |
348 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) | |
349 | |
350 #define _mm512_rcp28_ps(A) \ | |
351 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION) | |
352 | |
353 #define _mm512_mask_rcp28_ps(W, U, A) \ | |
354 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
355 | |
356 #define _mm512_maskz_rcp28_ps(U, A) \ | |
357 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) | |
358 | |
359 #define _mm512_rsqrt28_pd(A) \ | |
360 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION) | |
361 | |
362 #define _mm512_mask_rsqrt28_pd(W, U, A) \ | |
363 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
364 | |
365 #define _mm512_maskz_rsqrt28_pd(U, A) \ | |
366 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) | |
367 | |
368 #define _mm512_rsqrt28_ps(A) \ | |
369 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION) | |
370 | |
371 #define _mm512_mask_rsqrt28_ps(W, U, A) \ | |
372 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) | |
373 | |
374 #define _mm512_maskz_rsqrt28_ps(U, A) \ | |
375 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) | |
376 | |
377 #define _mm_rcp28_sd(A, B) \ | |
378 __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) | |
379 | |
380 #define _mm_rcp28_ss(A, B) \ | |
381 __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) | |
382 | |
383 #define _mm_rsqrt28_sd(A, B) \ | |
384 __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) | |
385 | |
386 #define _mm_rsqrt28_ss(A, B) \ | |
387 __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) | |
388 | |
389 #ifdef __DISABLE_AVX512ER__ | |
390 #undef __DISABLE_AVX512ER__ | |
391 #pragma GCC pop_options | |
392 #endif /* __DISABLE_AVX512ER__ */ | |
393 | |
394 #endif /* _AVX512ERINTRIN_H_INCLUDED */ |