Mercurial > hg > CbC > CbC_gcc
diff gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/config/rs6000/xmmintrin.h Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/config/rs6000/xmmintrin.h Thu Oct 25 07:37:49 2018 +0900 @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. +/* Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of GCC. @@ -51,13 +51,28 @@ the PowerISA FPSCR / VSCR registers. We recommend applications replace direct access to the MXSCR with the more portable <fenv.h> Posix APIs. */ -#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED +/* Define four value permute mask */ +#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) + #include <altivec.h> + +/* Avoid collisions between altivec.h and strict adherence to C++ and + C11 standards. This should eventually be done inside altivec.h itself, + but only after testing a full distro build. */ +#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && \ + __STDC_VERSION__ >= 201112L)) +#undef vector +#undef pixel +#undef bool +#endif + #include <assert.h> /* We need type definitions from the MMX header file. */ @@ -70,6 +85,10 @@ vector types, and their scalar components. */ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +/* Unaligned version of the same type. */ +typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, + __aligned__ (1))); + /* Internal data types for implementing the intrinsics. */ typedef float __v4sf __attribute__ ((__vector_size__ (16))); @@ -157,7 +176,7 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_ps (float *__P, __m128 __A) { - *(__m128 *)__P = __A; + *(__m128_u *)__P = __A; } /* Store four SPFP values in reverse order. The address must be aligned. */ @@ -438,13 +457,15 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ps (__m128 __A, __m128 __B) { - return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B)); + __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A); + return vec_sel (__B, __A, m); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ps (__m128 __A, __m128 __B) { - return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B)); + __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B); + return vec_sel (__B, __A, m); } /* Perform logical bit-wise operations on 128-bit values. */ @@ -968,12 +989,12 @@ { /* Splat two lower SPFP values to both halves. */ __v4sf temp, rounded; - __vector __m64 result; + __vector unsigned long long result; /* Splat two lower SPFP values to both halves. */ temp = (__v4sf) vec_splat ((__vector long long)__A, 0); rounded = vec_rint(temp); - result = (__vector __m64) vec_cts (rounded, 0); + result = (__vector unsigned long long) vec_cts (rounded, 0); return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); } @@ -1026,11 +1047,11 @@ _mm_cvttps_pi32 (__m128 __A) { __v4sf temp; - __vector __m64 result; + __vector unsigned long long result; /* Splat two lower SPFP values to both halves. */ temp = (__v4sf) vec_splat ((__vector long long)__A, 0); - result = (__vector __m64) vec_cts (temp, 0); + result = (__vector unsigned long long) vec_cts (temp, 0); return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); } @@ -1086,8 +1107,9 @@ vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B); vf1 = (__vector float) vec_ctf (vm1, 0); - return ((__m128) (__vector __m64) - { ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]}); + return ((__m128) (__vector unsigned long long) + { ((__vector unsigned long long)vf1) [0], + ((__vector unsigned long long)__A) [1]}); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1184,11 +1206,11 @@ { __v4sf rounded; __vector signed int temp; - __vector __m64 result; + __vector unsigned long long result; rounded = vec_rint(__A); temp = vec_cts (rounded, 0); - result = (__vector __m64) vec_pack (temp, temp); + result = (__vector unsigned long long) vec_pack (temp, temp); return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); } @@ -1265,8 +1287,8 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pi (__m128 __A, __m64 const *__P) { - __vector __m64 __a = (__vector __m64)__A; - __vector __m64 __p = vec_splats(*__P); + __vector unsigned long long __a = (__vector unsigned long long)__A; + __vector unsigned long long __p = vec_splats(*__P); __a [1] = __p [1]; return (__m128)__a; @@ -1276,7 +1298,7 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pi (__m64 *__P, __m128 __A) { - __vector __m64 __a = (__vector __m64) __A; + __vector unsigned long long __a = (__vector unsigned long long) __A; *__P = __a[1]; } @@ -1285,14 +1307,16 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehl_ps (__m128 __A, __m128 __B) { - return (__m128) vec_mergel ((__vector __m64)__B, (__vector __m64)__A); + return (__m128) vec_mergel ((__vector unsigned long long)__B, + (__vector unsigned long long)__A); } /* Moves the lower two values of B into the upper two values of A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movelh_ps (__m128 __A, __m128 __B) { - return (__m128) vec_mergeh ((__vector __m64)__A, (__vector __m64)__B); + return (__m128) vec_mergeh ((__vector unsigned long long)__A, + (__vector unsigned long long)__B); } /* Sets the lower two SPFP values with 64-bits of data loaded from P; @@ -1300,8 +1324,8 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pi (__m128 __A, __m64 const *__P) { - __vector __m64 __a = (__vector __m64)__A; - __vector __m64 __p = vec_splats(*__P); + __vector unsigned long long __a = (__vector unsigned long long)__A; + __vector unsigned long long __p = vec_splats(*__P); __a [0] = __p [0]; return (__m128)__a; @@ -1311,7 +1335,7 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pi (__m64 *__P, __m128 __A) { - __vector __m64 __a = (__vector __m64) __A; + __vector unsigned long long __a = (__vector unsigned long long) __A; *__P = __a[0]; } @@ -1323,7 +1347,7 @@ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_ps (__m128 __A) { - __vector __m64 result; + __vector unsigned long long result; static const __vector unsigned int perm_mask = { #ifdef __LITTLE_ENDIAN__ @@ -1333,8 +1357,9 @@ #endif }; - result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A, - (__vector unsigned char) perm_mask); + result = ((__vector unsigned long long) + vec_vbpermq ((__vector unsigned char) __A, + (__vector unsigned char) perm_mask)); #ifdef __LITTLE_ENDIAN__ return result[1]; @@ -1398,13 +1423,13 @@ { #if _ARCH_PWR8 __vector signed short a, b, r; - __vector bool short c; + __vector __bool short c; a = (__vector signed short)vec_splats (__A); b = (__vector signed short)vec_splats (__B); - c = (__vector bool short)vec_cmpgt (a, b); + c = (__vector __bool short)vec_cmpgt (a, b); r = vec_sel (b, a, c); - return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); + return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); #else __m64_union m1, m2, res; @@ -1436,13 +1461,13 @@ { #if _ARCH_PWR8 __vector unsigned char a, b, r; - __vector bool char c; + __vector __bool char c; a = (__vector unsigned char)vec_splats (__A); b = (__vector unsigned char)vec_splats (__B); - c = (__vector bool char)vec_cmpgt (a, b); + c = (__vector __bool char)vec_cmpgt (a, b); r = vec_sel (b, a, c); - return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); + return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); #else __m64_union m1, m2, res; long i; @@ -1472,13 +1497,13 @@ { #if _ARCH_PWR8 __vector signed short a, b, r; - __vector bool short c; + __vector __bool short c; a = (__vector signed short)vec_splats (__A); b = (__vector signed short)vec_splats (__B); - c = (__vector bool short)vec_cmplt (a, b); + c = (__vector __bool short)vec_cmplt (a, b); r = vec_sel (b, a, c); - return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); + return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); #else __m64_union m1, m2, res; @@ -1510,13 +1535,13 @@ { #if _ARCH_PWR8 __vector unsigned char a, b, r; - __vector bool char c; + __vector __bool char c; a = (__vector unsigned char)vec_splats (__A); b = (__vector unsigned char)vec_splats (__B); - c = (__vector bool char)vec_cmplt (a, b); + c = (__vector __bool char)vec_cmplt (a, b); r = vec_sel (b, a, c); - return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); + return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); #else __m64_union m1, m2, res; long i; @@ -1602,7 +1627,7 @@ #endif }; __m64_union t; - __vector __m64 a, p, r; + __vector unsigned long long a, p, r; #ifdef __LITTLE_ENDIAN__ t.as_short[0] = permute_selectors[element_selector_10];