annotate gcc/config/rs6000/pmmintrin.h @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
1 /* Copyright (C) 2003-2020 Free Software Foundation, Inc.
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
2
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
3 This file is part of GCC.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
4
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
5 GCC is free software; you can redistribute it and/or modify
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
6 it under the terms of the GNU General Public License as published by
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
7 the Free Software Foundation; either version 3, or (at your option)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
8 any later version.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
9
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
10 GCC is distributed in the hope that it will be useful,
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
13 GNU General Public License for more details.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
14
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
15 Under Section 7 of GPL version 3, you are granted additional
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
16 permissions described in the GCC Runtime Library Exception, version
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
17 3.1, as published by the Free Software Foundation.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
18
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
19 You should have received a copy of the GNU General Public License and
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
20 a copy of the GCC Runtime Library Exception along with this program;
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
22 <http://www.gnu.org/licenses/>. */
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
23
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
24 /* Implemented from the specification included in the Intel C++ Compiler
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
25 User Guide and Reference, version 9.0. */
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
26
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
27 #ifndef NO_WARN_X86_INTRINSICS
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
28 /* This header is distributed to simplify porting x86_64 code that
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
29 makes explicit use of Intel intrinsics to powerpc64le.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
30 It is the user's responsibility to determine if the results are
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
31 acceptable and make additional changes as necessary.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
32 Note that much code that uses Intel intrinsics can be rewritten in
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
33 standard C or GNU C extensions, which are more portable and better
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
34 optimized across multiple targets.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
35
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
36 In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
37 is a good match for most SIMD operations. However the Horizontal
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
38 add/sub requires the data pairs be permuted into a separate
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
39 registers with vertical even/odd alignment for the operation.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
40 And the addsub operation requires the sign of only the even numbered
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
41 elements be flipped (xored with -0.0).
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
42 For larger blocks of code using these intrinsic implementations,
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
43 the compiler be should be able to schedule instructions to avoid
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
44 additional latency.
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
45
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
46 In the specific case of the monitor and mwait instructions there are
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
47 no direct equivalent in the PowerISA at this time. So those
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
48 intrinsics are not implemented. */
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
49 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
50 #endif
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
51
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
52 #ifndef _PMMINTRIN_H_INCLUDED
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
53 #define _PMMINTRIN_H_INCLUDED
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
54
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
55 /* We need definitions from the SSE2 and SSE header files*/
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
56 #include <emmintrin.h>
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
57
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
58 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
59 _mm_addsub_ps (__m128 __X, __m128 __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
60 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
61 const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0};
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
62 __v4sf even_neg_Y = vec_xor(__Y, even_n0);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
63 return (__m128) vec_add (__X, even_neg_Y);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
64 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
65
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
66 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
67 _mm_addsub_pd (__m128d __X, __m128d __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
68 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
69 const __v2df even_n0 = {-0.0, 0.0};
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
70 __v2df even_neg_Y = vec_xor(__Y, even_n0);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
71 return (__m128d) vec_add (__X, even_neg_Y);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
72 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
73
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
74 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
75 _mm_hadd_ps (__m128 __X, __m128 __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
76 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
77 __vector unsigned char xform2 = {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
78 0x00, 0x01, 0x02, 0x03,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
79 0x08, 0x09, 0x0A, 0x0B,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
80 0x10, 0x11, 0x12, 0x13,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
81 0x18, 0x19, 0x1A, 0x1B
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
82 };
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
83 __vector unsigned char xform1 = {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
84 0x04, 0x05, 0x06, 0x07,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
85 0x0C, 0x0D, 0x0E, 0x0F,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
86 0x14, 0x15, 0x16, 0x17,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
87 0x1C, 0x1D, 0x1E, 0x1F
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
88 };
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
89 return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
90 vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
91 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
92
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
93 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
94 _mm_hsub_ps (__m128 __X, __m128 __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
95 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
96 __vector unsigned char xform2 = {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
97 0x00, 0x01, 0x02, 0x03,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
98 0x08, 0x09, 0x0A, 0x0B,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
99 0x10, 0x11, 0x12, 0x13,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
100 0x18, 0x19, 0x1A, 0x1B
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
101 };
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
102 __vector unsigned char xform1 = {
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
103 0x04, 0x05, 0x06, 0x07,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
104 0x0C, 0x0D, 0x0E, 0x0F,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
105 0x14, 0x15, 0x16, 0x17,
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
106 0x1C, 0x1D, 0x1E, 0x1F
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
107 };
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
108 return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
109 vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
110 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
111
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
112 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
113 _mm_hadd_pd (__m128d __X, __m128d __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
114 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
115 return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
116 vec_mergel ((__v2df) __X, (__v2df)__Y));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
117 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
118
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
119 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
120 _mm_hsub_pd (__m128d __X, __m128d __Y)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
121 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
122 return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
123 vec_mergel ((__v2df) __X, (__v2df)__Y));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
124 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
125
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
126 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
127 _mm_movehdup_ps (__m128 __X)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
128 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
129 return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
130 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
131
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
132 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
133 _mm_moveldup_ps (__m128 __X)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
134 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
135 return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
136 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
137
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
138 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
139 _mm_loaddup_pd (double const *__P)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
140 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
141 return (__m128d) vec_splats (*__P);
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
142 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
143
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
144 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
145 _mm_movedup_pd (__m128d __X)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
146 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
147 return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
148 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
149
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
150 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
151 _mm_lddqu_si128 (__m128i const *__P)
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
152 {
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
153 return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
154 }
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
155
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
156 /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
157
84e7813d76e9 gcc-8.2
mir3636
parents:
diff changeset
158 #endif /* _PMMINTRIN_H_INCLUDED */