0
|
1 /* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
|
2 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 Under Section 7 of GPL version 3, you are granted additional
|
|
17 permissions described in the GCC Runtime Library Exception, version
|
|
18 3.1, as published by the Free Software Foundation.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License and
|
|
21 a copy of the GCC Runtime Library Exception along with this program;
|
|
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
23 <http://www.gnu.org/licenses/>. */
|
|
24
|
|
25 /* Implemented from the specification included in the Intel C++ Compiler
|
|
26 User Guide and Reference, version 9.0. */
|
|
27
|
|
28 #ifndef _PMMINTRIN_H_INCLUDED
|
|
29 #define _PMMINTRIN_H_INCLUDED
|
|
30
|
|
31 #ifndef __SSE3__
|
|
32 # error "SSE3 instruction set not enabled"
|
|
33 #else
|
|
34
|
|
35 /* We need definitions from the SSE2 and SSE header files*/
|
|
36 #include <emmintrin.h>
|
|
37
|
|
38 /* Additional bits in the MXCSR. */
|
|
39 #define _MM_DENORMALS_ZERO_MASK 0x0040
|
|
40 #define _MM_DENORMALS_ZERO_ON 0x0040
|
|
41 #define _MM_DENORMALS_ZERO_OFF 0x0000
|
|
42
|
|
43 #define _MM_SET_DENORMALS_ZERO_MODE(mode) \
|
|
44 _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
|
45 #define _MM_GET_DENORMALS_ZERO_MODE() \
|
|
46 (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
|
|
47
|
|
48 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
49 _mm_addsub_ps (__m128 __X, __m128 __Y)
|
|
50 {
|
|
51 return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
|
|
52 }
|
|
53
|
|
54 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
55 _mm_hadd_ps (__m128 __X, __m128 __Y)
|
|
56 {
|
|
57 return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
|
|
58 }
|
|
59
|
|
60 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
61 _mm_hsub_ps (__m128 __X, __m128 __Y)
|
|
62 {
|
|
63 return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
|
|
64 }
|
|
65
|
|
66 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
67 _mm_movehdup_ps (__m128 __X)
|
|
68 {
|
|
69 return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
|
|
70 }
|
|
71
|
|
72 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
73 _mm_moveldup_ps (__m128 __X)
|
|
74 {
|
|
75 return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
|
|
76 }
|
|
77
|
|
78 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
79 _mm_addsub_pd (__m128d __X, __m128d __Y)
|
|
80 {
|
|
81 return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
|
|
82 }
|
|
83
|
|
84 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
85 _mm_hadd_pd (__m128d __X, __m128d __Y)
|
|
86 {
|
|
87 return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
|
|
88 }
|
|
89
|
|
90 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
91 _mm_hsub_pd (__m128d __X, __m128d __Y)
|
|
92 {
|
|
93 return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
|
|
94 }
|
|
95
|
|
96 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
97 _mm_loaddup_pd (double const *__P)
|
|
98 {
|
|
99 return _mm_load1_pd (__P);
|
|
100 }
|
|
101
|
|
102 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
103 _mm_movedup_pd (__m128d __X)
|
|
104 {
|
|
105 return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
|
|
106 }
|
|
107
|
|
108 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
109 _mm_lddqu_si128 (__m128i const *__P)
|
|
110 {
|
|
111 return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
|
|
112 }
|
|
113
|
|
114 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
115 _mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
|
|
116 {
|
|
117 __builtin_ia32_monitor (__P, __E, __H);
|
|
118 }
|
|
119
|
|
120 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
121 _mm_mwait (unsigned int __E, unsigned int __H)
|
|
122 {
|
|
123 __builtin_ia32_mwait (__E, __H);
|
|
124 }
|
|
125
|
|
126 #endif /* __SSE3__ */
|
|
127
|
|
128 #endif /* _PMMINTRIN_H_INCLUDED */
|