Mercurial > hg > CbC > CbC_gcc
diff gcc/config/i386/xmmintrin.h @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/config/i386/xmmintrin.h Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/config/i386/xmmintrin.h Fri Oct 27 22:46:09 2017 +0900 @@ -1,5 +1,4 @@ -/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. This file is part of GCC. @@ -28,20 +27,50 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED -#ifndef __SSE__ -# error "SSE instruction set not enabled" -#else - /* We need type definitions from the MMX header file. */ #include <mmintrin.h> /* Get _mm_malloc () and _mm_free (). */ #include <mm_malloc.h> +/* Constants for use with _mm_prefetch. */ +enum _mm_hint +{ + /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ + _MM_HINT_ET0 = 7, + _MM_HINT_ET1 = 6, + _MM_HINT_T0 = 3, + _MM_HINT_T1 = 2, + _MM_HINT_T2 = 1, + _MM_HINT_NTA = 0 +}; + +/* Loads one cache line from address P to a location "closer" to the + processor. The selector I specifies the type of prefetch operation. */ +#ifdef __OPTIMIZE__ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_prefetch (const void *__P, enum _mm_hint __I) +{ + __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); +} +#else +#define _mm_prefetch(P, I) \ + __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) +#endif + +#ifndef __SSE__ +#pragma GCC push_options +#pragma GCC target("sse") +#define __DISABLE_SSE__ +#endif /* __SSE__ */ + /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +/* Unaligned version of the same type. */ +typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); + /* Internal data types for implementing the intrinsics. */ typedef float __v4sf __attribute__ ((__vector_size__ (16))); @@ -49,15 +78,6 @@ #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) -/* Constants for use with _mm_prefetch. */ -enum _mm_hint -{ - _MM_HINT_T0 = 3, - _MM_HINT_T1 = 2, - _MM_HINT_T2 = 1, - _MM_HINT_NTA = 0 -}; - /* Bits in the MXCSR. */ #define _MM_EXCEPT_MASK 0x003f #define _MM_EXCEPT_INVALID 0x0001 @@ -85,6 +105,14 @@ #define _MM_FLUSH_ZERO_ON 0x8000 #define _MM_FLUSH_ZERO_OFF 0x0000 +/* Create an undefined vector. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ps (void) +{ + __m128 __Y = __Y; + return __Y; +} + /* Create a vector of zeros. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_ps (void) @@ -155,25 +183,25 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A + (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A - (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A * (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A / (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -896,14 +924,14 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps (float const *__P) { - return (__m128) *(__v4sf *)__P; + return *(__m128 *)__P; } /* Load four SPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_ps (float const *__P) { - return (__m128) __builtin_ia32_loadups (__P); + return *(__m128_u *)__P; } /* Load four SPFP values in reverse order. The address must be aligned. */ @@ -932,27 +960,27 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ss (float *__P, __m128 __A) { - *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); + *__P = ((__v4sf)__A)[0]; } extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_f32 (__m128 __A) { - return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); + return ((__v4sf)__A)[0]; } /* Store four SPFP values. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps (float *__P, __m128 __A) { - *(__v4sf *)__P = (__v4sf)__A; + *(__m128 *)__P = __A; } /* Store four SPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_ps (float *__P, __m128 __A) { - __builtin_ia32_storeups (__P, (__v4sf)__A); + *(__m128_u *)__P = __A; } /* Store the lower SPFP value across four words. */ @@ -1184,19 +1212,6 @@ return _mm_sad_pu8 (__A, __B); } -/* Loads one cache line from address P to a location "closer" to the - processor. The selector I specifies the type of prefetch operation. */ -#ifdef __OPTIMIZE__ -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_prefetch (const void *__P, enum _mm_hint __I) -{ - __builtin_prefetch (__P, 0, __I); -} -#else -#define _mm_prefetch(P, I) \ - __builtin_prefetch ((P), 0, (I)) -#endif - /* Stores the data in A to the address P without polluting the caches. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pi (__m64 *__P, __m64 __A) @@ -1219,15 +1234,6 @@ __builtin_ia32_sfence (); } -/* The execution of the next instruction is delayed by an implementation - specific amount of time. The instruction does not modify the - architectural state. */ -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_pause (void) -{ - __asm__ __volatile__ ("rep; nop" : : ); -} - /* Transpose the 4x4 matrix composed of row[0-3]. */ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ @@ -1243,9 +1249,22 @@ } while (0) /* For backward source compatibility. */ -#ifdef __SSE2__ # include <emmintrin.h> -#endif + +#ifdef __DISABLE_SSE__ +#undef __DISABLE_SSE__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE__ */ -#endif /* __SSE__ */ +/* The execution of the next instruction is delayed by an implementation + specific amount of time. The instruction does not modify the + architectural state. This is after the pop_options pragma because + it does not require SSE support in the processor--the encoding is a + nop on processors that do not support it. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_pause (void) +{ + __builtin_ia32_pause (); +} + #endif /* _XMMINTRIN_H_INCLUDED */