diff gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line diff
--- a/gcc/config/rs6000/xmmintrin.h	Fri Oct 27 22:46:09 2017 +0900
+++ b/gcc/config/rs6000/xmmintrin.h	Thu Oct 25 07:37:49 2018 +0900
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -51,13 +51,28 @@
    the PowerISA FPSCR / VSCR registers. We recommend applications
    replace direct access to the MXSCR with the more portable <fenv.h>
    Posix APIs. */
-#warning "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
+#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
 #endif
 
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
 
+/* Define four value permute mask */
+#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
+
 #include <altivec.h>
+
+/* Avoid collisions between altivec.h and strict adherence to C++ and
+   C11 standards.  This should eventually be done inside altivec.h itself,
+   but only after testing a full distro build.  */
+#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
+				 (defined(__STDC_VERSION__) &&	\
+				  __STDC_VERSION__ >= 201112L))
+#undef vector
+#undef pixel
+#undef bool
+#endif
+
 #include <assert.h>
 
 /* We need type definitions from the MMX header file.  */
@@ -70,6 +85,10 @@
    vector types, and their scalar components.  */
 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
 
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
+				       __aligned__ (1)));
+
 /* Internal data types for implementing the intrinsics.  */
 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
 
@@ -157,7 +176,7 @@
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storeu_ps (float *__P, __m128 __A)
 {
-  *(__m128 *)__P = __A;
+  *(__m128_u *)__P = __A;
 }
 
 /* Store four SPFP values in reverse order.  The address must be aligned.  */
@@ -438,13 +457,15 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, m);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, m);
 }
 
 /* Perform logical bit-wise operations on 128-bit values.  */
@@ -968,12 +989,12 @@
 {
   /* Splat two lower SPFP values to both halves.  */
   __v4sf temp, rounded;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   /* Splat two lower SPFP values to both halves.  */
   temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
   rounded = vec_rint(temp);
-  result = (__vector __m64) vec_cts (rounded, 0);
+  result = (__vector unsigned long long) vec_cts (rounded, 0);
 
   return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
@@ -1026,11 +1047,11 @@
 _mm_cvttps_pi32 (__m128 __A)
 {
   __v4sf temp;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   /* Splat two lower SPFP values to both halves.  */
   temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  result = (__vector __m64) vec_cts (temp, 0);
+  result = (__vector unsigned long long) vec_cts (temp, 0);
 
   return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
@@ -1086,8 +1107,9 @@
   vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
   vf1 = (__vector float) vec_ctf (vm1, 0);
 
-  return ((__m128) (__vector __m64)
-    { ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]});
+  return ((__m128) (__vector unsigned long long)
+    { ((__vector unsigned long long)vf1) [0],
+	((__vector unsigned long long)__A) [1]});
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1184,11 +1206,11 @@
 {
   __v4sf rounded;
   __vector signed int temp;
-  __vector __m64 result;
+  __vector unsigned long long result;
 
   rounded = vec_rint(__A);
   temp = vec_cts (rounded, 0);
-  result = (__vector __m64) vec_pack (temp, temp);
+  result = (__vector unsigned long long) vec_pack (temp, temp);
 
   return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
@@ -1265,8 +1287,8 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadh_pi (__m128 __A, __m64 const *__P)
 {
-  __vector __m64 __a = (__vector __m64)__A;
-  __vector __m64 __p = vec_splats(*__P);
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
   __a [1] = __p [1];
 
   return (__m128)__a;
@@ -1276,7 +1298,7 @@
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storeh_pi (__m64 *__P, __m128 __A)
 {
-  __vector __m64 __a = (__vector __m64) __A;
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
 
   *__P = __a[1];
 }
@@ -1285,14 +1307,16 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movehl_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) vec_mergel ((__vector __m64)__B, (__vector __m64)__A);
+  return (__m128) vec_mergel ((__vector unsigned long long)__B,
+			      (__vector unsigned long long)__A);
 }
 
 /* Moves the lower two values of B into the upper two values of A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movelh_ps (__m128 __A, __m128 __B)
 {
-  return (__m128) vec_mergeh ((__vector __m64)__A, (__vector __m64)__B);
+  return (__m128) vec_mergeh ((__vector unsigned long long)__A,
+			      (__vector unsigned long long)__B);
 }
 
 /* Sets the lower two SPFP values with 64-bits of data loaded from P;
@@ -1300,8 +1324,8 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadl_pi (__m128 __A, __m64 const *__P)
 {
-  __vector __m64 __a = (__vector __m64)__A;
-  __vector __m64 __p = vec_splats(*__P);
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
   __a [0] = __p [0];
 
   return (__m128)__a;
@@ -1311,7 +1335,7 @@
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storel_pi (__m64 *__P, __m128 __A)
 {
-  __vector __m64 __a = (__vector __m64) __A;
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
 
   *__P = __a[0];
 }
@@ -1323,7 +1347,7 @@
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_ps (__m128  __A)
 {
-  __vector __m64 result;
+  __vector unsigned long long result;
   static const __vector unsigned int perm_mask =
     {
 #ifdef __LITTLE_ENDIAN__
@@ -1333,8 +1357,9 @@
 #endif
     };
 
-  result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A,
-					 (__vector unsigned char) perm_mask);
+  result = ((__vector unsigned long long)
+	    vec_vbpermq ((__vector unsigned char) __A,
+			 (__vector unsigned char) perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
   return result[1];
@@ -1398,13 +1423,13 @@
 {
 #if _ARCH_PWR8
   __vector signed short a, b, r;
-  __vector bool short c;
+  __vector __bool short c;
 
   a = (__vector signed short)vec_splats (__A);
   b = (__vector signed short)vec_splats (__B);
-  c = (__vector bool short)vec_cmpgt (a, b);
+  c = (__vector __bool short)vec_cmpgt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
   __m64_union m1, m2, res;
 
@@ -1436,13 +1461,13 @@
 {
 #if _ARCH_PWR8
   __vector unsigned char a, b, r;
-  __vector bool char c;
+  __vector __bool char c;
 
   a = (__vector unsigned char)vec_splats (__A);
   b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector bool char)vec_cmpgt (a, b);
+  c = (__vector __bool char)vec_cmpgt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
   __m64_union m1, m2, res;
   long i;
@@ -1472,13 +1497,13 @@
 {
 #if _ARCH_PWR8
   __vector signed short a, b, r;
-  __vector bool short c;
+  __vector __bool short c;
 
   a = (__vector signed short)vec_splats (__A);
   b = (__vector signed short)vec_splats (__B);
-  c = (__vector bool short)vec_cmplt (a, b);
+  c = (__vector __bool short)vec_cmplt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
   __m64_union m1, m2, res;
 
@@ -1510,13 +1535,13 @@
 {
 #if _ARCH_PWR8
   __vector unsigned char a, b, r;
-  __vector bool char c;
+  __vector __bool char c;
 
   a = (__vector unsigned char)vec_splats (__A);
   b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector bool char)vec_cmplt (a, b);
+  c = (__vector __bool char)vec_cmplt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
   __m64_union m1, m2, res;
   long i;
@@ -1602,7 +1627,7 @@
 #endif
     };
   __m64_union t;
-  __vector __m64 a, p, r;
+  __vector unsigned long long a, p, r;
 
 #ifdef __LITTLE_ENDIAN__
   t.as_short[0] = permute_selectors[element_selector_10];