Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Copyright (C) 2002-2017 Free Software Foundation, Inc. | 1 /* Copyright (C) 2002-2018 Free Software Foundation, Inc. |
2 | 2 |
3 This file is part of GCC. | 3 This file is part of GCC. |
4 | 4 |
5 GCC is free software; you can redistribute it and/or modify | 5 GCC is free software; you can redistribute it and/or modify |
6 it under the terms of the GNU General Public License as published by | 6 it under the terms of the GNU General Public License as published by |
49 | 49 |
50 Another difference is the format and details of the X86_64 MXSCR vs | 50 Another difference is the format and details of the X86_64 MXSCR vs |
51 the PowerISA FPSCR / VSCR registers. We recommend applications | 51 the PowerISA FPSCR / VSCR registers. We recommend applications |
52 replace direct access to the MXSCR with the more portable <fenv.h> | 52 replace direct access to the MXSCR with the more portable <fenv.h> |
53 Posix APIs. */ | 53 Posix APIs. */ |
54 #warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." | 54 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." |
55 #endif | 55 #endif |
56 | 56 |
57 #ifndef _XMMINTRIN_H_INCLUDED | 57 #ifndef _XMMINTRIN_H_INCLUDED |
58 #define _XMMINTRIN_H_INCLUDED | 58 #define _XMMINTRIN_H_INCLUDED |
59 | 59 |
60 /* Define four value permute mask */ | |
61 #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) | |
62 | |
60 #include <altivec.h> | 63 #include <altivec.h> |
64 | |
65 /* Avoid collisions between altivec.h and strict adherence to C++ and | |
66 C11 standards. This should eventually be done inside altivec.h itself, | |
67 but only after testing a full distro build. */ | |
68 #if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \ | |
69 (defined(__STDC_VERSION__) && \ | |
70 __STDC_VERSION__ >= 201112L)) | |
71 #undef vector | |
72 #undef pixel | |
73 #undef bool | |
74 #endif | |
75 | |
61 #include <assert.h> | 76 #include <assert.h> |
62 | 77 |
63 /* We need type definitions from the MMX header file. */ | 78 /* We need type definitions from the MMX header file. */ |
64 #include <mmintrin.h> | 79 #include <mmintrin.h> |
65 | 80 |
67 #include <mm_malloc.h> | 82 #include <mm_malloc.h> |
68 | 83 |
69 /* The Intel API is flexible enough that we must allow aliasing with other | 84 /* The Intel API is flexible enough that we must allow aliasing with other |
70 vector types, and their scalar components. */ | 85 vector types, and their scalar components. */ |
71 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); | 86 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); |
87 | |
88 /* Unaligned version of the same type. */ | |
89 typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, | |
90 __aligned__ (1))); | |
72 | 91 |
73 /* Internal data types for implementing the intrinsics. */ | 92 /* Internal data types for implementing the intrinsics. */ |
74 typedef float __v4sf __attribute__ ((__vector_size__ (16))); | 93 typedef float __v4sf __attribute__ ((__vector_size__ (16))); |
75 | 94 |
76 /* Create an undefined vector. */ | 95 /* Create an undefined vector. */ |
155 | 174 |
156 /* Store four SPFP values. The address need not be 16-byte aligned. */ | 175 /* Store four SPFP values. The address need not be 16-byte aligned. */ |
157 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 176 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
158 _mm_storeu_ps (float *__P, __m128 __A) | 177 _mm_storeu_ps (float *__P, __m128 __A) |
159 { | 178 { |
160 *(__m128 *)__P = __A; | 179 *(__m128_u *)__P = __A; |
161 } | 180 } |
162 | 181 |
163 /* Store four SPFP values in reverse order. The address must be aligned. */ | 182 /* Store four SPFP values in reverse order. The address must be aligned. */ |
164 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 183 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
165 _mm_storer_ps (float *__P, __m128 __A) | 184 _mm_storer_ps (float *__P, __m128 __A) |
436 } | 455 } |
437 | 456 |
438 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 457 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
439 _mm_min_ps (__m128 __A, __m128 __B) | 458 _mm_min_ps (__m128 __A, __m128 __B) |
440 { | 459 { |
441 return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B)); | 460 __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A); |
461 return vec_sel (__B, __A, m); | |
442 } | 462 } |
443 | 463 |
444 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 464 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
445 _mm_max_ps (__m128 __A, __m128 __B) | 465 _mm_max_ps (__m128 __A, __m128 __B) |
446 { | 466 { |
447 return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B)); | 467 __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B); |
468 return vec_sel (__B, __A, m); | |
448 } | 469 } |
449 | 470 |
450 /* Perform logical bit-wise operations on 128-bit values. */ | 471 /* Perform logical bit-wise operations on 128-bit values. */ |
451 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 472 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
452 _mm_and_ps (__m128 __A, __m128 __B) | 473 _mm_and_ps (__m128 __A, __m128 __B) |
966 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 987 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
967 _mm_cvtps_pi32 (__m128 __A) | 988 _mm_cvtps_pi32 (__m128 __A) |
968 { | 989 { |
969 /* Splat two lower SPFP values to both halves. */ | 990 /* Splat two lower SPFP values to both halves. */ |
970 __v4sf temp, rounded; | 991 __v4sf temp, rounded; |
971 __vector __m64 result; | 992 __vector unsigned long long result; |
972 | 993 |
973 /* Splat two lower SPFP values to both halves. */ | 994 /* Splat two lower SPFP values to both halves. */ |
974 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); | 995 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); |
975 rounded = vec_rint(temp); | 996 rounded = vec_rint(temp); |
976 result = (__vector __m64) vec_cts (rounded, 0); | 997 result = (__vector unsigned long long) vec_cts (rounded, 0); |
977 | 998 |
978 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); | 999 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); |
979 } | 1000 } |
980 | 1001 |
981 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1002 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1024 integers in packed form. */ | 1045 integers in packed form. */ |
1025 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1046 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1026 _mm_cvttps_pi32 (__m128 __A) | 1047 _mm_cvttps_pi32 (__m128 __A) |
1027 { | 1048 { |
1028 __v4sf temp; | 1049 __v4sf temp; |
1029 __vector __m64 result; | 1050 __vector unsigned long long result; |
1030 | 1051 |
1031 /* Splat two lower SPFP values to both halves. */ | 1052 /* Splat two lower SPFP values to both halves. */ |
1032 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); | 1053 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); |
1033 result = (__vector __m64) vec_cts (temp, 0); | 1054 result = (__vector unsigned long long) vec_cts (temp, 0); |
1034 | 1055 |
1035 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); | 1056 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); |
1036 } | 1057 } |
1037 | 1058 |
1038 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1059 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1084 __vector float vf1; | 1105 __vector float vf1; |
1085 | 1106 |
1086 vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B); | 1107 vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B); |
1087 vf1 = (__vector float) vec_ctf (vm1, 0); | 1108 vf1 = (__vector float) vec_ctf (vm1, 0); |
1088 | 1109 |
1089 return ((__m128) (__vector __m64) | 1110 return ((__m128) (__vector unsigned long long) |
1090 { ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]}); | 1111 { ((__vector unsigned long long)vf1) [0], |
1112 ((__vector unsigned long long)__A) [1]}); | |
1091 } | 1113 } |
1092 | 1114 |
1093 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1115 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1094 _mm_cvt_pi2ps (__m128 __A, __m64 __B) | 1116 _mm_cvt_pi2ps (__m128 __A, __m64 __B) |
1095 { | 1117 { |
1182 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1204 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1183 _mm_cvtps_pi16(__m128 __A) | 1205 _mm_cvtps_pi16(__m128 __A) |
1184 { | 1206 { |
1185 __v4sf rounded; | 1207 __v4sf rounded; |
1186 __vector signed int temp; | 1208 __vector signed int temp; |
1187 __vector __m64 result; | 1209 __vector unsigned long long result; |
1188 | 1210 |
1189 rounded = vec_rint(__A); | 1211 rounded = vec_rint(__A); |
1190 temp = vec_cts (rounded, 0); | 1212 temp = vec_cts (rounded, 0); |
1191 result = (__vector __m64) vec_pack (temp, temp); | 1213 result = (__vector unsigned long long) vec_pack (temp, temp); |
1192 | 1214 |
1193 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); | 1215 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); |
1194 } | 1216 } |
1195 | 1217 |
1196 /* Convert the four SPFP values in A to four signed 8-bit integers. */ | 1218 /* Convert the four SPFP values in A to four signed 8-bit integers. */ |
1263 /* Sets the upper two SPFP values with 64-bits of data loaded from P; | 1285 /* Sets the upper two SPFP values with 64-bits of data loaded from P; |
1264 the lower two values are passed through from A. */ | 1286 the lower two values are passed through from A. */ |
1265 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1287 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1266 _mm_loadh_pi (__m128 __A, __m64 const *__P) | 1288 _mm_loadh_pi (__m128 __A, __m64 const *__P) |
1267 { | 1289 { |
1268 __vector __m64 __a = (__vector __m64)__A; | 1290 __vector unsigned long long __a = (__vector unsigned long long)__A; |
1269 __vector __m64 __p = vec_splats(*__P); | 1291 __vector unsigned long long __p = vec_splats(*__P); |
1270 __a [1] = __p [1]; | 1292 __a [1] = __p [1]; |
1271 | 1293 |
1272 return (__m128)__a; | 1294 return (__m128)__a; |
1273 } | 1295 } |
1274 | 1296 |
1275 /* Stores the upper two SPFP values of A into P. */ | 1297 /* Stores the upper two SPFP values of A into P. */ |
1276 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1298 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1277 _mm_storeh_pi (__m64 *__P, __m128 __A) | 1299 _mm_storeh_pi (__m64 *__P, __m128 __A) |
1278 { | 1300 { |
1279 __vector __m64 __a = (__vector __m64) __A; | 1301 __vector unsigned long long __a = (__vector unsigned long long) __A; |
1280 | 1302 |
1281 *__P = __a[1]; | 1303 *__P = __a[1]; |
1282 } | 1304 } |
1283 | 1305 |
1284 /* Moves the upper two values of B into the lower two values of A. */ | 1306 /* Moves the upper two values of B into the lower two values of A. */ |
1285 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1307 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1286 _mm_movehl_ps (__m128 __A, __m128 __B) | 1308 _mm_movehl_ps (__m128 __A, __m128 __B) |
1287 { | 1309 { |
1288 return (__m128) vec_mergel ((__vector __m64)__B, (__vector __m64)__A); | 1310 return (__m128) vec_mergel ((__vector unsigned long long)__B, |
1311 (__vector unsigned long long)__A); | |
1289 } | 1312 } |
1290 | 1313 |
1291 /* Moves the lower two values of B into the upper two values of A. */ | 1314 /* Moves the lower two values of B into the upper two values of A. */ |
1292 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1315 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1293 _mm_movelh_ps (__m128 __A, __m128 __B) | 1316 _mm_movelh_ps (__m128 __A, __m128 __B) |
1294 { | 1317 { |
1295 return (__m128) vec_mergeh ((__vector __m64)__A, (__vector __m64)__B); | 1318 return (__m128) vec_mergeh ((__vector unsigned long long)__A, |
1319 (__vector unsigned long long)__B); | |
1296 } | 1320 } |
1297 | 1321 |
1298 /* Sets the lower two SPFP values with 64-bits of data loaded from P; | 1322 /* Sets the lower two SPFP values with 64-bits of data loaded from P; |
1299 the upper two values are passed through from A. */ | 1323 the upper two values are passed through from A. */ |
1300 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1324 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1301 _mm_loadl_pi (__m128 __A, __m64 const *__P) | 1325 _mm_loadl_pi (__m128 __A, __m64 const *__P) |
1302 { | 1326 { |
1303 __vector __m64 __a = (__vector __m64)__A; | 1327 __vector unsigned long long __a = (__vector unsigned long long)__A; |
1304 __vector __m64 __p = vec_splats(*__P); | 1328 __vector unsigned long long __p = vec_splats(*__P); |
1305 __a [0] = __p [0]; | 1329 __a [0] = __p [0]; |
1306 | 1330 |
1307 return (__m128)__a; | 1331 return (__m128)__a; |
1308 } | 1332 } |
1309 | 1333 |
1310 /* Stores the lower two SPFP values of A into P. */ | 1334 /* Stores the lower two SPFP values of A into P. */ |
1311 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1335 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1312 _mm_storel_pi (__m64 *__P, __m128 __A) | 1336 _mm_storel_pi (__m64 *__P, __m128 __A) |
1313 { | 1337 { |
1314 __vector __m64 __a = (__vector __m64) __A; | 1338 __vector unsigned long long __a = (__vector unsigned long long) __A; |
1315 | 1339 |
1316 *__P = __a[0]; | 1340 *__P = __a[0]; |
1317 } | 1341 } |
1318 | 1342 |
1319 #ifdef _ARCH_PWR8 | 1343 #ifdef _ARCH_PWR8 |
1321 | 1345 |
1322 /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ | 1346 /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ |
1323 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1347 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1324 _mm_movemask_ps (__m128 __A) | 1348 _mm_movemask_ps (__m128 __A) |
1325 { | 1349 { |
1326 __vector __m64 result; | 1350 __vector unsigned long long result; |
1327 static const __vector unsigned int perm_mask = | 1351 static const __vector unsigned int perm_mask = |
1328 { | 1352 { |
1329 #ifdef __LITTLE_ENDIAN__ | 1353 #ifdef __LITTLE_ENDIAN__ |
1330 0x00204060, 0x80808080, 0x80808080, 0x80808080 | 1354 0x00204060, 0x80808080, 0x80808080, 0x80808080 |
1331 #elif __BIG_ENDIAN__ | 1355 #elif __BIG_ENDIAN__ |
1332 0x80808080, 0x80808080, 0x80808080, 0x00204060 | 1356 0x80808080, 0x80808080, 0x80808080, 0x00204060 |
1333 #endif | 1357 #endif |
1334 }; | 1358 }; |
1335 | 1359 |
1336 result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A, | 1360 result = ((__vector unsigned long long) |
1337 (__vector unsigned char) perm_mask); | 1361 vec_vbpermq ((__vector unsigned char) __A, |
1362 (__vector unsigned char) perm_mask)); | |
1338 | 1363 |
1339 #ifdef __LITTLE_ENDIAN__ | 1364 #ifdef __LITTLE_ENDIAN__ |
1340 return result[1]; | 1365 return result[1]; |
1341 #elif __BIG_ENDIAN__ | 1366 #elif __BIG_ENDIAN__ |
1342 return result[0]; | 1367 return result[0]; |
1396 | 1421 |
1397 _mm_max_pi16 (__m64 __A, __m64 __B) | 1422 _mm_max_pi16 (__m64 __A, __m64 __B) |
1398 { | 1423 { |
1399 #if _ARCH_PWR8 | 1424 #if _ARCH_PWR8 |
1400 __vector signed short a, b, r; | 1425 __vector signed short a, b, r; |
1401 __vector bool short c; | 1426 __vector __bool short c; |
1402 | 1427 |
1403 a = (__vector signed short)vec_splats (__A); | 1428 a = (__vector signed short)vec_splats (__A); |
1404 b = (__vector signed short)vec_splats (__B); | 1429 b = (__vector signed short)vec_splats (__B); |
1405 c = (__vector bool short)vec_cmpgt (a, b); | 1430 c = (__vector __bool short)vec_cmpgt (a, b); |
1406 r = vec_sel (b, a, c); | 1431 r = vec_sel (b, a, c); |
1407 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); | 1432 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); |
1408 #else | 1433 #else |
1409 __m64_union m1, m2, res; | 1434 __m64_union m1, m2, res; |
1410 | 1435 |
1411 m1.as_m64 = __A; | 1436 m1.as_m64 = __A; |
1412 m2.as_m64 = __B; | 1437 m2.as_m64 = __B; |
1434 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1459 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1435 _mm_max_pu8 (__m64 __A, __m64 __B) | 1460 _mm_max_pu8 (__m64 __A, __m64 __B) |
1436 { | 1461 { |
1437 #if _ARCH_PWR8 | 1462 #if _ARCH_PWR8 |
1438 __vector unsigned char a, b, r; | 1463 __vector unsigned char a, b, r; |
1439 __vector bool char c; | 1464 __vector __bool char c; |
1440 | 1465 |
1441 a = (__vector unsigned char)vec_splats (__A); | 1466 a = (__vector unsigned char)vec_splats (__A); |
1442 b = (__vector unsigned char)vec_splats (__B); | 1467 b = (__vector unsigned char)vec_splats (__B); |
1443 c = (__vector bool char)vec_cmpgt (a, b); | 1468 c = (__vector __bool char)vec_cmpgt (a, b); |
1444 r = vec_sel (b, a, c); | 1469 r = vec_sel (b, a, c); |
1445 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); | 1470 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); |
1446 #else | 1471 #else |
1447 __m64_union m1, m2, res; | 1472 __m64_union m1, m2, res; |
1448 long i; | 1473 long i; |
1449 | 1474 |
1450 m1.as_m64 = __A; | 1475 m1.as_m64 = __A; |
1470 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1495 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1471 _mm_min_pi16 (__m64 __A, __m64 __B) | 1496 _mm_min_pi16 (__m64 __A, __m64 __B) |
1472 { | 1497 { |
1473 #if _ARCH_PWR8 | 1498 #if _ARCH_PWR8 |
1474 __vector signed short a, b, r; | 1499 __vector signed short a, b, r; |
1475 __vector bool short c; | 1500 __vector __bool short c; |
1476 | 1501 |
1477 a = (__vector signed short)vec_splats (__A); | 1502 a = (__vector signed short)vec_splats (__A); |
1478 b = (__vector signed short)vec_splats (__B); | 1503 b = (__vector signed short)vec_splats (__B); |
1479 c = (__vector bool short)vec_cmplt (a, b); | 1504 c = (__vector __bool short)vec_cmplt (a, b); |
1480 r = vec_sel (b, a, c); | 1505 r = vec_sel (b, a, c); |
1481 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); | 1506 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); |
1482 #else | 1507 #else |
1483 __m64_union m1, m2, res; | 1508 __m64_union m1, m2, res; |
1484 | 1509 |
1485 m1.as_m64 = __A; | 1510 m1.as_m64 = __A; |
1486 m2.as_m64 = __B; | 1511 m2.as_m64 = __B; |
1508 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1533 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1509 _mm_min_pu8 (__m64 __A, __m64 __B) | 1534 _mm_min_pu8 (__m64 __A, __m64 __B) |
1510 { | 1535 { |
1511 #if _ARCH_PWR8 | 1536 #if _ARCH_PWR8 |
1512 __vector unsigned char a, b, r; | 1537 __vector unsigned char a, b, r; |
1513 __vector bool char c; | 1538 __vector __bool char c; |
1514 | 1539 |
1515 a = (__vector unsigned char)vec_splats (__A); | 1540 a = (__vector unsigned char)vec_splats (__A); |
1516 b = (__vector unsigned char)vec_splats (__B); | 1541 b = (__vector unsigned char)vec_splats (__B); |
1517 c = (__vector bool char)vec_cmplt (a, b); | 1542 c = (__vector __bool char)vec_cmplt (a, b); |
1518 r = vec_sel (b, a, c); | 1543 r = vec_sel (b, a, c); |
1519 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); | 1544 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); |
1520 #else | 1545 #else |
1521 __m64_union m1, m2, res; | 1546 __m64_union m1, m2, res; |
1522 long i; | 1547 long i; |
1523 | 1548 |
1524 m1.as_m64 = __A; | 1549 m1.as_m64 = __A; |
1600 #elif __BIG_ENDIAN__ | 1625 #elif __BIG_ENDIAN__ |
1601 0x0607, 0x0405, 0x0203, 0x0001 | 1626 0x0607, 0x0405, 0x0203, 0x0001 |
1602 #endif | 1627 #endif |
1603 }; | 1628 }; |
1604 __m64_union t; | 1629 __m64_union t; |
1605 __vector __m64 a, p, r; | 1630 __vector unsigned long long a, p, r; |
1606 | 1631 |
1607 #ifdef __LITTLE_ENDIAN__ | 1632 #ifdef __LITTLE_ENDIAN__ |
1608 t.as_short[0] = permute_selectors[element_selector_10]; | 1633 t.as_short[0] = permute_selectors[element_selector_10]; |
1609 t.as_short[1] = permute_selectors[element_selector_32]; | 1634 t.as_short[1] = permute_selectors[element_selector_32]; |
1610 t.as_short[2] = permute_selectors[element_selector_54]; | 1635 t.as_short[2] = permute_selectors[element_selector_54]; |