Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/rs6000/pmmintrin.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Copyright (C) 2003-2018 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify | |
6 it under the terms of the GNU General Public License as published by | |
7 the Free Software Foundation; either version 3, or (at your option) | |
8 any later version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 /* Implemented from the specification included in the Intel C++ Compiler | |
25 User Guide and Reference, version 9.0. */ | |
26 | |
27 #ifndef NO_WARN_X86_INTRINSICS | |
28 /* This header is distributed to simplify porting x86_64 code that | |
29 makes explicit use of Intel intrinsics to powerpc64le. | |
30 It is the user's responsibility to determine if the results are | |
31 acceptable and make additional changes as necessary. | |
32 Note that much code that uses Intel intrinsics can be rewritten in | |
33 standard C or GNU C extensions, which are more portable and better | |
34 optimized across multiple targets. | |
35 | |
36 In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA | |
37 is a good match for most SIMD operations. However the Horizontal | |
38 add/sub requires the data pairs be permuted into a separate | |
39 registers with vertical even/odd alignment for the operation. | |
40 And the addsub operation requires the sign of only the even numbered | |
41 elements be flipped (xored with -0.0). | |
42 For larger blocks of code using these intrinsic implementations, | |
43 the compiler be should be able to schedule instructions to avoid | |
44 additional latency. | |
45 | |
46 In the specific case of the monitor and mwait instructions there are | |
47 no direct equivalent in the PowerISA at this time. So those | |
48 intrinsics are not implemented. */ | |
49 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." | |
50 #endif | |
51 | |
52 #ifndef _PMMINTRIN_H_INCLUDED | |
53 #define _PMMINTRIN_H_INCLUDED | |
54 | |
55 /* We need definitions from the SSE2 and SSE header files*/ | |
56 #include <emmintrin.h> | |
57 | |
58 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
59 _mm_addsub_ps (__m128 __X, __m128 __Y) | |
60 { | |
61 const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0}; | |
62 __v4sf even_neg_Y = vec_xor(__Y, even_n0); | |
63 return (__m128) vec_add (__X, even_neg_Y); | |
64 } | |
65 | |
66 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
67 _mm_addsub_pd (__m128d __X, __m128d __Y) | |
68 { | |
69 const __v2df even_n0 = {-0.0, 0.0}; | |
70 __v2df even_neg_Y = vec_xor(__Y, even_n0); | |
71 return (__m128d) vec_add (__X, even_neg_Y); | |
72 } | |
73 | |
74 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
75 _mm_hadd_ps (__m128 __X, __m128 __Y) | |
76 { | |
77 __vector unsigned char xform2 = { | |
78 #ifdef __LITTLE_ENDIAN__ | |
79 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B | |
80 #elif __BIG_ENDIAN__ | |
81 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F, 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F | |
82 #endif | |
83 }; | |
84 __vector unsigned char xform1 = { | |
85 #ifdef __LITTLE_ENDIAN__ | |
86 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F | |
87 #elif __BIG_ENDIAN__ | |
88 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B | |
89 #endif | |
90 }; | |
91 return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), | |
92 vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); | |
93 } | |
94 | |
95 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
96 _mm_hsub_ps (__m128 __X, __m128 __Y) | |
97 { | |
98 __vector unsigned char xform2 = { | |
99 #ifdef __LITTLE_ENDIAN__ | |
100 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B | |
101 #elif __BIG_ENDIAN__ | |
102 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F, 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F | |
103 #endif | |
104 }; | |
105 __vector unsigned char xform1 = { | |
106 #ifdef __LITTLE_ENDIAN__ | |
107 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F | |
108 #elif __BIG_ENDIAN__ | |
109 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B | |
110 #endif | |
111 }; | |
112 return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), | |
113 vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); | |
114 } | |
115 | |
116 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
117 _mm_hadd_pd (__m128d __X, __m128d __Y) | |
118 { | |
119 return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y), | |
120 vec_mergel ((__v2df) __X, (__v2df)__Y)); | |
121 } | |
122 | |
123 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
124 _mm_hsub_pd (__m128d __X, __m128d __Y) | |
125 { | |
126 return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y), | |
127 vec_mergel ((__v2df) __X, (__v2df)__Y)); | |
128 } | |
129 | |
130 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
131 _mm_movehdup_ps (__m128 __X) | |
132 { | |
133 return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X); | |
134 } | |
135 | |
136 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
137 _mm_moveldup_ps (__m128 __X) | |
138 { | |
139 return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X); | |
140 } | |
141 | |
142 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
143 _mm_loaddup_pd (double const *__P) | |
144 { | |
145 return (__m128d) vec_splats (*__P); | |
146 } | |
147 | |
148 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
149 _mm_movedup_pd (__m128d __X) | |
150 { | |
151 return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0)); | |
152 } | |
153 | |
154 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
155 _mm_lddqu_si128 (__m128i const *__P) | |
156 { | |
157 return (__m128i) (vec_vsx_ld(0, (signed int const *)__P)); | |
158 } | |
159 | |
160 /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */ | |
161 | |
162 #endif /* _PMMINTRIN_H_INCLUDED */ |