CbC/CbC_gcc: gcc/config/rs6000/xmmintrin.h comparison

comparison gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9

gcc-8.2

author	mir3636
date	Thu, 25 Oct 2018 07:37:49 +0900
parents	04ced10e8804
children	1830386684a0

comparison

equal deleted inserted replaced

-:04ced10e8804
+:84e7813d76e9
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 Another difference is the format and details of the X86_64 MXSCR vs
 the PowerISA FPSCR / VSCR registers. We recommend applications
 replace direct access to the MXSCR with the more portable <fenv.h>
 Posix APIs. */
-#warning "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
+#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
 #endif
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
+/* Define four value permute mask */
+#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
 #include <altivec.h>
+/* Avoid collisions between altivec.h and strict adherence to C++ and
+C11 standards.  This should eventually be done inside altivec.h itself,
+but only after testing a full distro build.  */
+#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
+				 (defined(__STDC_VERSION__) &&	\
+				  __STDC_VERSION__ >= 201112L))
+#undef vector
+#undef pixel
+#undef bool
+#endif
 #include <assert.h>
 /* We need type definitions from the MMX header file.  */
 #include <mmintrin.h>
 #include <mm_malloc.h>
 /* The Intel API is flexible enough that we must allow aliasing with other
 vector types, and their scalar components.  */
 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
+				       __aligned__ (1)));
 /* Internal data types for implementing the intrinsics.  */
 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
 /* Create an undefined vector.  */
 /* Store four SPFP values.  The address need not be 16-byte aligned.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storeu_ps (float *__P, __m128 __A)
 {
-*(__m128 *)__P = __A;
+*(__m128_u *)__P = __A;
 }
 /* Store four SPFP values in reverse order.  The address must be aligned.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storer_ps (float *__P, __m128 __A)
 }
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ps (__m128 __A, __m128 __B)
 {
-return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B));
+__m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A);
+return vec_sel (__B, __A, m);
 }
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ps (__m128 __A, __m128 __B)
 {
-return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B));
+__m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B);
+return vec_sel (__B, __A, m);
 }
 /* Perform logical bit-wise operations on 128-bit values.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_and_ps (__m128 __A, __m128 __B)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtps_pi32 (__m128 __A)
 {
 /* Splat two lower SPFP values to both halves.  */
 __v4sf temp, rounded;
-__vector __m64 result;
+__vector unsigned long long result;
 /* Splat two lower SPFP values to both halves.  */
 temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
 rounded = vec_rint(temp);
-result = (__vector __m64) vec_cts (rounded, 0);
+result = (__vector unsigned long long) vec_cts (rounded, 0);
 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 integers in packed form.  */
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttps_pi32 (__m128 __A)
 {
 __v4sf temp;
-__vector __m64 result;
+__vector unsigned long long result;
 /* Splat two lower SPFP values to both halves.  */
 temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-result = (__vector __m64) vec_cts (temp, 0);
+result = (__vector unsigned long long) vec_cts (temp, 0);
 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __vector float vf1;
 vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
 vf1 = (__vector float) vec_ctf (vm1, 0);
-return ((__m128) (__vector __m64)
+return ((__m128) (__vector unsigned long long)
-{ ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]});
+{ ((__vector unsigned long long)vf1) [0],
+	((__vector unsigned long long)__A) [1]});
 }
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt_pi2ps (__m128 __A, __m64 __B)
 {
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtps_pi16(__m128 __A)
 {
 __v4sf rounded;
 __vector signed int temp;
-__vector __m64 result;
+__vector unsigned long long result;
 rounded = vec_rint(__A);
 temp = vec_cts (rounded, 0);
-result = (__vector __m64) vec_pack (temp, temp);
+result = (__vector unsigned long long) vec_pack (temp, temp);
 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
 }
 /* Convert the four SPFP values in A to four signed 8-bit integers.  */
 /* Sets the upper two SPFP values with 64-bits of data loaded from P;
 the lower two values are passed through from A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadh_pi (__m128 __A, __m64 const *__P)
 {
-__vector __m64 __a = (__vector __m64)__A;
+__vector unsigned long long __a = (__vector unsigned long long)__A;
-__vector __m64 __p = vec_splats(*__P);
+__vector unsigned long long __p = vec_splats(*__P);
 __a [1] = __p [1];
 return (__m128)__a;
 }
 /* Stores the upper two SPFP values of A into P.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storeh_pi (__m64 *__P, __m128 __A)
 {
-__vector __m64 __a = (__vector __m64) __A;
+__vector unsigned long long __a = (__vector unsigned long long) __A;
 *__P = __a[1];
 }
 /* Moves the upper two values of B into the lower two values of A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movehl_ps (__m128 __A, __m128 __B)
 {
-return (__m128) vec_mergel ((__vector __m64)__B, (__vector __m64)__A);
+return (__m128) vec_mergel ((__vector unsigned long long)__B,
+			      (__vector unsigned long long)__A);
 }
 /* Moves the lower two values of B into the upper two values of A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movelh_ps (__m128 __A, __m128 __B)
 {
-return (__m128) vec_mergeh ((__vector __m64)__A, (__vector __m64)__B);
+return (__m128) vec_mergeh ((__vector unsigned long long)__A,
+			      (__vector unsigned long long)__B);
 }
 /* Sets the lower two SPFP values with 64-bits of data loaded from P;
 the upper two values are passed through from A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadl_pi (__m128 __A, __m64 const *__P)
 {
-__vector __m64 __a = (__vector __m64)__A;
+__vector unsigned long long __a = (__vector unsigned long long)__A;
-__vector __m64 __p = vec_splats(*__P);
+__vector unsigned long long __p = vec_splats(*__P);
 __a [0] = __p [0];
 return (__m128)__a;
 }
 /* Stores the lower two SPFP values of A into P.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_storel_pi (__m64 *__P, __m128 __A)
 {
-__vector __m64 __a = (__vector __m64) __A;
+__vector unsigned long long __a = (__vector unsigned long long) __A;
 *__P = __a[0];
 }
 #ifdef _ARCH_PWR8
 /* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_ps (__m128  __A)
 {
-__vector __m64 result;
+__vector unsigned long long result;
 static const __vector unsigned int perm_mask =
 {
 #ifdef __LITTLE_ENDIAN__
 	0x00204060, 0x80808080, 0x80808080, 0x80808080
 #elif __BIG_ENDIAN__
 0x80808080, 0x80808080, 0x80808080, 0x00204060
 #endif
 };
-result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A,
+result = ((__vector unsigned long long)
-					 (__vector unsigned char) perm_mask);
+	    vec_vbpermq ((__vector unsigned char) __A,
+			 (__vector unsigned char) perm_mask));
 #ifdef __LITTLE_ENDIAN__
 return result[1];
 #elif __BIG_ENDIAN__
 return result[0];
 _mm_max_pi16 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
 __vector signed short a, b, r;
-__vector bool short c;
+__vector __bool short c;
 a = (__vector signed short)vec_splats (__A);
 b = (__vector signed short)vec_splats (__B);
-c = (__vector bool short)vec_cmpgt (a, b);
+c = (__vector __bool short)vec_cmpgt (a, b);
 r = vec_sel (b, a, c);
-return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
 __m64_union m1, m2, res;
 m1.as_m64 = __A;
 m2.as_m64 = __B;
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_pu8 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
 __vector unsigned char a, b, r;
-__vector bool char c;
+__vector __bool char c;
 a = (__vector unsigned char)vec_splats (__A);
 b = (__vector unsigned char)vec_splats (__B);
-c = (__vector bool char)vec_cmpgt (a, b);
+c = (__vector __bool char)vec_cmpgt (a, b);
 r = vec_sel (b, a, c);
-return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
 __m64_union m1, m2, res;
 long i;
 m1.as_m64 = __A;
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_pi16 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
 __vector signed short a, b, r;
-__vector bool short c;
+__vector __bool short c;
 a = (__vector signed short)vec_splats (__A);
 b = (__vector signed short)vec_splats (__B);
-c = (__vector bool short)vec_cmplt (a, b);
+c = (__vector __bool short)vec_cmplt (a, b);
 r = vec_sel (b, a, c);
-return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
 __m64_union m1, m2, res;
 m1.as_m64 = __A;
 m2.as_m64 = __B;
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_pu8 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
 __vector unsigned char a, b, r;
-__vector bool char c;
+__vector __bool char c;
 a = (__vector unsigned char)vec_splats (__A);
 b = (__vector unsigned char)vec_splats (__B);
-c = (__vector bool char)vec_cmplt (a, b);
+c = (__vector __bool char)vec_cmplt (a, b);
 r = vec_sel (b, a, c);
-return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0));
+return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
 #else
 __m64_union m1, m2, res;
 long i;
 m1.as_m64 = __A;
 #elif __BIG_ENDIAN__
 	      0x0607, 0x0405, 0x0203, 0x0001
 #endif
 };
 __m64_union t;
-__vector __m64 a, p, r;
+__vector unsigned long long a, p, r;
 #ifdef __LITTLE_ENDIAN__
 t.as_short[0] = permute_selectors[element_selector_10];
 t.as_short[1] = permute_selectors[element_selector_32];
 t.as_short[2] = permute_selectors[element_selector_54];

Mercurial > hg > CbC > CbC_gcc

comparison gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9