comparison gcc/config/i386/avx512erintrin.h @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512ERINTRIN_H_INCLUDED
29 #define _AVX512ERINTRIN_H_INCLUDED
30
31 #ifndef __AVX512ER__
32 #pragma GCC push_options
33 #pragma GCC target("avx512er")
34 #define __DISABLE_AVX512ER__
35 #endif /* __AVX512ER__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45
46 typedef unsigned char __mmask8;
47 typedef unsigned short __mmask16;
48
49 #ifdef __OPTIMIZE__
50 extern __inline __m512d
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
53 {
54 __m512d __W;
55 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
56 (__v8df) __W,
57 (__mmask8) -1, __R);
58 }
59
60 extern __inline __m512d
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
63 {
64 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
65 (__v8df) __W,
66 (__mmask8) __U, __R);
67 }
68
69 extern __inline __m512d
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
72 {
73 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
74 (__v8df) _mm512_setzero_pd (),
75 (__mmask8) __U, __R);
76 }
77
78 extern __inline __m512
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80 _mm512_exp2a23_round_ps (__m512 __A, int __R)
81 {
82 __m512 __W;
83 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
84 (__v16sf) __W,
85 (__mmask16) -1, __R);
86 }
87
88 extern __inline __m512
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
91 {
92 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
93 (__v16sf) __W,
94 (__mmask16) __U, __R);
95 }
96
97 extern __inline __m512
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
100 {
101 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
102 (__v16sf) _mm512_setzero_ps (),
103 (__mmask16) __U, __R);
104 }
105
106 extern __inline __m512d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm512_rcp28_round_pd (__m512d __A, int __R)
109 {
110 __m512d __W;
111 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
112 (__v8df) __W,
113 (__mmask8) -1, __R);
114 }
115
116 extern __inline __m512d
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
119 {
120 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
121 (__v8df) __W,
122 (__mmask8) __U, __R);
123 }
124
125 extern __inline __m512d
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
128 {
129 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
130 (__v8df) _mm512_setzero_pd (),
131 (__mmask8) __U, __R);
132 }
133
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_rcp28_round_ps (__m512 __A, int __R)
137 {
138 __m512 __W;
139 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
140 (__v16sf) __W,
141 (__mmask16) -1, __R);
142 }
143
144 extern __inline __m512
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
147 {
148 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
149 (__v16sf) __W,
150 (__mmask16) __U, __R);
151 }
152
153 extern __inline __m512
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
156 {
157 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
158 (__v16sf) _mm512_setzero_ps (),
159 (__mmask16) __U, __R);
160 }
161
162 extern __inline __m128d
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
165 {
166 return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
167 (__v2df) __A,
168 __R);
169 }
170
171 extern __inline __m128
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
174 {
175 return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
176 (__v4sf) __A,
177 __R);
178 }
179
180 extern __inline __m512d
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
183 {
184 __m512d __W;
185 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
186 (__v8df) __W,
187 (__mmask8) -1, __R);
188 }
189
190 extern __inline __m512d
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
193 {
194 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
195 (__v8df) __W,
196 (__mmask8) __U, __R);
197 }
198
199 extern __inline __m512d
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
202 {
203 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
204 (__v8df) _mm512_setzero_pd (),
205 (__mmask8) __U, __R);
206 }
207
208 extern __inline __m512
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
211 {
212 __m512 __W;
213 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
214 (__v16sf) __W,
215 (__mmask16) -1, __R);
216 }
217
218 extern __inline __m512
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
221 {
222 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
223 (__v16sf) __W,
224 (__mmask16) __U, __R);
225 }
226
227 extern __inline __m512
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
230 {
231 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
232 (__v16sf) _mm512_setzero_ps (),
233 (__mmask16) __U, __R);
234 }
235
236 extern __inline __m128d
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
239 {
240 return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
241 (__v2df) __A,
242 __R);
243 }
244
245 extern __inline __m128
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
248 {
249 return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
250 (__v4sf) __A,
251 __R);
252 }
253
254 #else
255 #define _mm512_exp2a23_round_pd(A, C) \
256 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
257
258 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
259 __builtin_ia32_exp2pd_mask(A, W, U, C)
260
261 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
262 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
263
264 #define _mm512_exp2a23_round_ps(A, C) \
265 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
266
267 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
268 __builtin_ia32_exp2ps_mask(A, W, U, C)
269
270 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
271 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
272
273 #define _mm512_rcp28_round_pd(A, C) \
274 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
275
276 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
277 __builtin_ia32_rcp28pd_mask(A, W, U, C)
278
279 #define _mm512_maskz_rcp28_round_pd(U, A, C) \
280 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
281
282 #define _mm512_rcp28_round_ps(A, C) \
283 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
284
285 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
286 __builtin_ia32_rcp28ps_mask(A, W, U, C)
287
288 #define _mm512_maskz_rcp28_round_ps(U, A, C) \
289 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
290
291 #define _mm512_rsqrt28_round_pd(A, C) \
292 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
293
294 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
295 __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
296
297 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
298 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
299
300 #define _mm512_rsqrt28_round_ps(A, C) \
301 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
302
303 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
304 __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
305
306 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
307 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
308
309 #define _mm_rcp28_round_sd(A, B, R) \
310 __builtin_ia32_rcp28sd_round(A, B, R)
311
312 #define _mm_rcp28_round_ss(A, B, R) \
313 __builtin_ia32_rcp28ss_round(A, B, R)
314
315 #define _mm_rsqrt28_round_sd(A, B, R) \
316 __builtin_ia32_rsqrt28sd_round(A, B, R)
317
318 #define _mm_rsqrt28_round_ss(A, B, R) \
319 __builtin_ia32_rsqrt28ss_round(A, B, R)
320
321 #endif
322
323 #define _mm512_exp2a23_pd(A) \
324 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
325
326 #define _mm512_mask_exp2a23_pd(W, U, A) \
327 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
328
329 #define _mm512_maskz_exp2a23_pd(U, A) \
330 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
331
332 #define _mm512_exp2a23_ps(A) \
333 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
334
335 #define _mm512_mask_exp2a23_ps(W, U, A) \
336 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
337
338 #define _mm512_maskz_exp2a23_ps(U, A) \
339 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
340
341 #define _mm512_rcp28_pd(A) \
342 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
343
344 #define _mm512_mask_rcp28_pd(W, U, A) \
345 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
346
347 #define _mm512_maskz_rcp28_pd(U, A) \
348 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
349
350 #define _mm512_rcp28_ps(A) \
351 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
352
353 #define _mm512_mask_rcp28_ps(W, U, A) \
354 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
355
356 #define _mm512_maskz_rcp28_ps(U, A) \
357 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
358
359 #define _mm512_rsqrt28_pd(A) \
360 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
361
362 #define _mm512_mask_rsqrt28_pd(W, U, A) \
363 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
364
365 #define _mm512_maskz_rsqrt28_pd(U, A) \
366 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
367
368 #define _mm512_rsqrt28_ps(A) \
369 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
370
371 #define _mm512_mask_rsqrt28_ps(W, U, A) \
372 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
373
374 #define _mm512_maskz_rsqrt28_ps(U, A) \
375 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
376
377 #define _mm_rcp28_sd(A, B) \
378 __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
379
380 #define _mm_rcp28_ss(A, B) \
381 __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
382
383 #define _mm_rsqrt28_sd(A, B) \
384 __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
385
386 #define _mm_rsqrt28_ss(A, B) \
387 __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
388
389 #ifdef __DISABLE_AVX512ER__
390 #undef __DISABLE_AVX512ER__
391 #pragma GCC pop_options
392 #endif /* __DISABLE_AVX512ER__ */
393
394 #endif /* _AVX512ERINTRIN_H_INCLUDED */