diff gcc/config/i386/xmmintrin.h @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/config/i386/xmmintrin.h	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/config/i386/xmmintrin.h	Fri Oct 27 22:46:09 2017 +0900
@@ -1,5 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-   Free Software Foundation, Inc.
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,20 +27,50 @@
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
 
-#ifndef __SSE__
-# error "SSE instruction set not enabled"
-#else
-
 /* We need type definitions from the MMX header file.  */
 #include <mmintrin.h>
 
 /* Get _mm_malloc () and _mm_free ().  */
 #include <mm_malloc.h>
 
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
+  _MM_HINT_ET0 = 7,
+  _MM_HINT_ET1 = 6,
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+}
+#else
+#define _mm_prefetch(P, I) \
+  __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+#endif
+
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __DISABLE_SSE__
+#endif /* __SSE__ */
+
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
 
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+
 /* Internal data types for implementing the intrinsics.  */
 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
 
@@ -49,15 +78,6 @@
 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
  (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
 
-/* Constants for use with _mm_prefetch.  */
-enum _mm_hint
-{
-  _MM_HINT_T0 = 3,
-  _MM_HINT_T1 = 2,
-  _MM_HINT_T2 = 1,
-  _MM_HINT_NTA = 0
-};
-
 /* Bits in the MXCSR.  */
 #define _MM_EXCEPT_MASK       0x003f
 #define _MM_EXCEPT_INVALID    0x0001
@@ -85,6 +105,14 @@
 #define _MM_FLUSH_ZERO_ON     0x8000
 #define _MM_FLUSH_ZERO_OFF    0x0000
 
+/* Create an undefined vector.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+  __m128 __Y = __Y;
+  return __Y;
+}
+
 /* Create a vector of zeros.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_setzero_ps (void)
@@ -155,25 +183,25 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) ((__v4sf)__A + (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) ((__v4sf)__A - (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) ((__v4sf)__A * (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_div_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) ((__v4sf)__A / (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -896,14 +924,14 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_load_ps (float const *__P)
 {
-  return (__m128) *(__v4sf *)__P;
+  return *(__m128 *)__P;
 }
 
 /* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadu_ps (float const *__P)
 {
-  return (__m128) __builtin_ia32_loadups (__P);
+  return *(__m128_u *)__P;
 }
 
 /* Load four SPFP values in reverse order.  The address must be aligned.  */
@@ -932,27 +960,27 @@
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_ss (float *__P, __m128 __A)
 {
-  *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+  *__P = ((__v4sf)__A)[0];
 }
 
 extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_f32 (__m128 __A)
 {
-  return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+  return ((__v4sf)__A)[0];
 }
 
 /* Store four SPFP values.  The address must be 16-byte aligned.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_ps (float *__P, __m128 __A)
 {
-  *(__v4sf *)__P = (__v4sf)__A;
+  *(__m128 *)__P = __A;
 }
 
 /* Store four SPFP values.  The address need not be 16-byte aligned.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storeu_ps (float *__P, __m128 __A)
 {
-  __builtin_ia32_storeups (__P, (__v4sf)__A);
+  *(__m128_u *)__P = __A;
 }
 
 /* Store the lower SPFP value across four words.  */
@@ -1184,19 +1212,6 @@
   return _mm_sad_pu8 (__A, __B);
 }
 
-/* Loads one cache line from address P to a location "closer" to the
-   processor.  The selector I specifies the type of prefetch operation.  */
-#ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_prefetch (const void *__P, enum _mm_hint __I)
-{
-  __builtin_prefetch (__P, 0, __I);
-}
-#else
-#define _mm_prefetch(P, I) \
-  __builtin_prefetch ((P), 0, (I))
-#endif
-
 /* Stores the data in A to the address P without polluting the caches.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_stream_pi (__m64 *__P, __m64 __A)
@@ -1219,15 +1234,6 @@
   __builtin_ia32_sfence ();
 }
 
-/* The execution of the next instruction is delayed by an implementation
-   specific amount of time.  The instruction does not modify the
-   architectural state.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_pause (void)
-{
-  __asm__ __volatile__ ("rep; nop" : : );
-}
-
 /* Transpose the 4x4 matrix composed of row[0-3].  */
 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
 do {									\
@@ -1243,9 +1249,22 @@
 } while (0)
 
 /* For backward source compatibility.  */
-#ifdef __SSE2__
 # include <emmintrin.h>
-#endif
+
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE__ */
 
-#endif /* __SSE__ */
+/* The execution of the next instruction is delayed by an implementation
+   specific amount of time.  The instruction does not modify the
+   architectural state.  This is after the pop_options pragma because
+   it does not require SSE support in the processor--the encoding is a
+   nop on processors that do not support it.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  __builtin_ia32_pause ();
+}
+
 #endif /* _XMMINTRIN_H_INCLUDED */