comparison gcc/config/rs6000/xmmintrin.h @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Copyright (C) 2002-2017 Free Software Foundation, Inc. 1 /* Copyright (C) 2002-2018 Free Software Foundation, Inc.
2 2
3 This file is part of GCC. 3 This file is part of GCC.
4 4
5 GCC is free software; you can redistribute it and/or modify 5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
49 49
50 Another difference is the format and details of the X86_64 MXSCR vs 50 Another difference is the format and details of the X86_64 MXSCR vs
51 the PowerISA FPSCR / VSCR registers. We recommend applications 51 the PowerISA FPSCR / VSCR registers. We recommend applications
52 replace direct access to the MXSCR with the more portable <fenv.h> 52 replace direct access to the MXSCR with the more portable <fenv.h>
53 Posix APIs. */ 53 Posix APIs. */
54 #warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." 54 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
55 #endif 55 #endif
56 56
57 #ifndef _XMMINTRIN_H_INCLUDED 57 #ifndef _XMMINTRIN_H_INCLUDED
58 #define _XMMINTRIN_H_INCLUDED 58 #define _XMMINTRIN_H_INCLUDED
59 59
60 /* Define four value permute mask */
61 #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
62
60 #include <altivec.h> 63 #include <altivec.h>
64
65 /* Avoid collisions between altivec.h and strict adherence to C++ and
66 C11 standards. This should eventually be done inside altivec.h itself,
67 but only after testing a full distro build. */
68 #if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
69 (defined(__STDC_VERSION__) && \
70 __STDC_VERSION__ >= 201112L))
71 #undef vector
72 #undef pixel
73 #undef bool
74 #endif
75
61 #include <assert.h> 76 #include <assert.h>
62 77
63 /* We need type definitions from the MMX header file. */ 78 /* We need type definitions from the MMX header file. */
64 #include <mmintrin.h> 79 #include <mmintrin.h>
65 80
67 #include <mm_malloc.h> 82 #include <mm_malloc.h>
68 83
69 /* The Intel API is flexible enough that we must allow aliasing with other 84 /* The Intel API is flexible enough that we must allow aliasing with other
70 vector types, and their scalar components. */ 85 vector types, and their scalar components. */
71 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); 86 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
87
88 /* Unaligned version of the same type. */
89 typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
90 __aligned__ (1)));
72 91
73 /* Internal data types for implementing the intrinsics. */ 92 /* Internal data types for implementing the intrinsics. */
74 typedef float __v4sf __attribute__ ((__vector_size__ (16))); 93 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
75 94
76 /* Create an undefined vector. */ 95 /* Create an undefined vector. */
155 174
156 /* Store four SPFP values. The address need not be 16-byte aligned. */ 175 /* Store four SPFP values. The address need not be 16-byte aligned. */
157 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 176 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
158 _mm_storeu_ps (float *__P, __m128 __A) 177 _mm_storeu_ps (float *__P, __m128 __A)
159 { 178 {
160 *(__m128 *)__P = __A; 179 *(__m128_u *)__P = __A;
161 } 180 }
162 181
163 /* Store four SPFP values in reverse order. The address must be aligned. */ 182 /* Store four SPFP values in reverse order. The address must be aligned. */
164 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 183 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165 _mm_storer_ps (float *__P, __m128 __A) 184 _mm_storer_ps (float *__P, __m128 __A)
436 } 455 }
437 456
438 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 457 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
439 _mm_min_ps (__m128 __A, __m128 __B) 458 _mm_min_ps (__m128 __A, __m128 __B)
440 { 459 {
441 return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B)); 460 __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A);
461 return vec_sel (__B, __A, m);
442 } 462 }
443 463
444 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 464 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
445 _mm_max_ps (__m128 __A, __m128 __B) 465 _mm_max_ps (__m128 __A, __m128 __B)
446 { 466 {
447 return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B)); 467 __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B);
468 return vec_sel (__B, __A, m);
448 } 469 }
449 470
450 /* Perform logical bit-wise operations on 128-bit values. */ 471 /* Perform logical bit-wise operations on 128-bit values. */
451 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 472 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
452 _mm_and_ps (__m128 __A, __m128 __B) 473 _mm_and_ps (__m128 __A, __m128 __B)
966 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 987 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
967 _mm_cvtps_pi32 (__m128 __A) 988 _mm_cvtps_pi32 (__m128 __A)
968 { 989 {
969 /* Splat two lower SPFP values to both halves. */ 990 /* Splat two lower SPFP values to both halves. */
970 __v4sf temp, rounded; 991 __v4sf temp, rounded;
971 __vector __m64 result; 992 __vector unsigned long long result;
972 993
973 /* Splat two lower SPFP values to both halves. */ 994 /* Splat two lower SPFP values to both halves. */
974 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); 995 temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
975 rounded = vec_rint(temp); 996 rounded = vec_rint(temp);
976 result = (__vector __m64) vec_cts (rounded, 0); 997 result = (__vector unsigned long long) vec_cts (rounded, 0);
977 998
978 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); 999 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
979 } 1000 }
980 1001
981 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1002 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1024 integers in packed form. */ 1045 integers in packed form. */
1025 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1046 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm_cvttps_pi32 (__m128 __A) 1047 _mm_cvttps_pi32 (__m128 __A)
1027 { 1048 {
1028 __v4sf temp; 1049 __v4sf temp;
1029 __vector __m64 result; 1050 __vector unsigned long long result;
1030 1051
1031 /* Splat two lower SPFP values to both halves. */ 1052 /* Splat two lower SPFP values to both halves. */
1032 temp = (__v4sf) vec_splat ((__vector long long)__A, 0); 1053 temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
1033 result = (__vector __m64) vec_cts (temp, 0); 1054 result = (__vector unsigned long long) vec_cts (temp, 0);
1034 1055
1035 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); 1056 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
1036 } 1057 }
1037 1058
1038 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1059 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1084 __vector float vf1; 1105 __vector float vf1;
1085 1106
1086 vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B); 1107 vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
1087 vf1 = (__vector float) vec_ctf (vm1, 0); 1108 vf1 = (__vector float) vec_ctf (vm1, 0);
1088 1109
1089 return ((__m128) (__vector __m64) 1110 return ((__m128) (__vector unsigned long long)
1090 { ((__vector __m64)vf1) [0], ((__vector __m64)__A) [1]}); 1111 { ((__vector unsigned long long)vf1) [0],
1112 ((__vector unsigned long long)__A) [1]});
1091 } 1113 }
1092 1114
1093 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1115 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1094 _mm_cvt_pi2ps (__m128 __A, __m64 __B) 1116 _mm_cvt_pi2ps (__m128 __A, __m64 __B)
1095 { 1117 {
1182 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1204 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1183 _mm_cvtps_pi16(__m128 __A) 1205 _mm_cvtps_pi16(__m128 __A)
1184 { 1206 {
1185 __v4sf rounded; 1207 __v4sf rounded;
1186 __vector signed int temp; 1208 __vector signed int temp;
1187 __vector __m64 result; 1209 __vector unsigned long long result;
1188 1210
1189 rounded = vec_rint(__A); 1211 rounded = vec_rint(__A);
1190 temp = vec_cts (rounded, 0); 1212 temp = vec_cts (rounded, 0);
1191 result = (__vector __m64) vec_pack (temp, temp); 1213 result = (__vector unsigned long long) vec_pack (temp, temp);
1192 1214
1193 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); 1215 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
1194 } 1216 }
1195 1217
1196 /* Convert the four SPFP values in A to four signed 8-bit integers. */ 1218 /* Convert the four SPFP values in A to four signed 8-bit integers. */
1263 /* Sets the upper two SPFP values with 64-bits of data loaded from P; 1285 /* Sets the upper two SPFP values with 64-bits of data loaded from P;
1264 the lower two values are passed through from A. */ 1286 the lower two values are passed through from A. */
1265 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1287 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm_loadh_pi (__m128 __A, __m64 const *__P) 1288 _mm_loadh_pi (__m128 __A, __m64 const *__P)
1267 { 1289 {
1268 __vector __m64 __a = (__vector __m64)__A; 1290 __vector unsigned long long __a = (__vector unsigned long long)__A;
1269 __vector __m64 __p = vec_splats(*__P); 1291 __vector unsigned long long __p = vec_splats(*__P);
1270 __a [1] = __p [1]; 1292 __a [1] = __p [1];
1271 1293
1272 return (__m128)__a; 1294 return (__m128)__a;
1273 } 1295 }
1274 1296
1275 /* Stores the upper two SPFP values of A into P. */ 1297 /* Stores the upper two SPFP values of A into P. */
1276 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1298 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1277 _mm_storeh_pi (__m64 *__P, __m128 __A) 1299 _mm_storeh_pi (__m64 *__P, __m128 __A)
1278 { 1300 {
1279 __vector __m64 __a = (__vector __m64) __A; 1301 __vector unsigned long long __a = (__vector unsigned long long) __A;
1280 1302
1281 *__P = __a[1]; 1303 *__P = __a[1];
1282 } 1304 }
1283 1305
1284 /* Moves the upper two values of B into the lower two values of A. */ 1306 /* Moves the upper two values of B into the lower two values of A. */
1285 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1307 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286 _mm_movehl_ps (__m128 __A, __m128 __B) 1308 _mm_movehl_ps (__m128 __A, __m128 __B)
1287 { 1309 {
1288 return (__m128) vec_mergel ((__vector __m64)__B, (__vector __m64)__A); 1310 return (__m128) vec_mergel ((__vector unsigned long long)__B,
1311 (__vector unsigned long long)__A);
1289 } 1312 }
1290 1313
1291 /* Moves the lower two values of B into the upper two values of A. */ 1314 /* Moves the lower two values of B into the upper two values of A. */
1292 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1315 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1293 _mm_movelh_ps (__m128 __A, __m128 __B) 1316 _mm_movelh_ps (__m128 __A, __m128 __B)
1294 { 1317 {
1295 return (__m128) vec_mergeh ((__vector __m64)__A, (__vector __m64)__B); 1318 return (__m128) vec_mergeh ((__vector unsigned long long)__A,
1319 (__vector unsigned long long)__B);
1296 } 1320 }
1297 1321
1298 /* Sets the lower two SPFP values with 64-bits of data loaded from P; 1322 /* Sets the lower two SPFP values with 64-bits of data loaded from P;
1299 the upper two values are passed through from A. */ 1323 the upper two values are passed through from A. */
1300 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1324 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1301 _mm_loadl_pi (__m128 __A, __m64 const *__P) 1325 _mm_loadl_pi (__m128 __A, __m64 const *__P)
1302 { 1326 {
1303 __vector __m64 __a = (__vector __m64)__A; 1327 __vector unsigned long long __a = (__vector unsigned long long)__A;
1304 __vector __m64 __p = vec_splats(*__P); 1328 __vector unsigned long long __p = vec_splats(*__P);
1305 __a [0] = __p [0]; 1329 __a [0] = __p [0];
1306 1330
1307 return (__m128)__a; 1331 return (__m128)__a;
1308 } 1332 }
1309 1333
1310 /* Stores the lower two SPFP values of A into P. */ 1334 /* Stores the lower two SPFP values of A into P. */
1311 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1335 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm_storel_pi (__m64 *__P, __m128 __A) 1336 _mm_storel_pi (__m64 *__P, __m128 __A)
1313 { 1337 {
1314 __vector __m64 __a = (__vector __m64) __A; 1338 __vector unsigned long long __a = (__vector unsigned long long) __A;
1315 1339
1316 *__P = __a[0]; 1340 *__P = __a[0];
1317 } 1341 }
1318 1342
1319 #ifdef _ARCH_PWR8 1343 #ifdef _ARCH_PWR8
1321 1345
1322 /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ 1346 /* Creates a 4-bit mask from the most significant bits of the SPFP values. */
1323 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1347 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1324 _mm_movemask_ps (__m128 __A) 1348 _mm_movemask_ps (__m128 __A)
1325 { 1349 {
1326 __vector __m64 result; 1350 __vector unsigned long long result;
1327 static const __vector unsigned int perm_mask = 1351 static const __vector unsigned int perm_mask =
1328 { 1352 {
1329 #ifdef __LITTLE_ENDIAN__ 1353 #ifdef __LITTLE_ENDIAN__
1330 0x00204060, 0x80808080, 0x80808080, 0x80808080 1354 0x00204060, 0x80808080, 0x80808080, 0x80808080
1331 #elif __BIG_ENDIAN__ 1355 #elif __BIG_ENDIAN__
1332 0x80808080, 0x80808080, 0x80808080, 0x00204060 1356 0x80808080, 0x80808080, 0x80808080, 0x00204060
1333 #endif 1357 #endif
1334 }; 1358 };
1335 1359
1336 result = (__vector __m64) vec_vbpermq ((__vector unsigned char) __A, 1360 result = ((__vector unsigned long long)
1337 (__vector unsigned char) perm_mask); 1361 vec_vbpermq ((__vector unsigned char) __A,
1362 (__vector unsigned char) perm_mask));
1338 1363
1339 #ifdef __LITTLE_ENDIAN__ 1364 #ifdef __LITTLE_ENDIAN__
1340 return result[1]; 1365 return result[1];
1341 #elif __BIG_ENDIAN__ 1366 #elif __BIG_ENDIAN__
1342 return result[0]; 1367 return result[0];
1396 1421
1397 _mm_max_pi16 (__m64 __A, __m64 __B) 1422 _mm_max_pi16 (__m64 __A, __m64 __B)
1398 { 1423 {
1399 #if _ARCH_PWR8 1424 #if _ARCH_PWR8
1400 __vector signed short a, b, r; 1425 __vector signed short a, b, r;
1401 __vector bool short c; 1426 __vector __bool short c;
1402 1427
1403 a = (__vector signed short)vec_splats (__A); 1428 a = (__vector signed short)vec_splats (__A);
1404 b = (__vector signed short)vec_splats (__B); 1429 b = (__vector signed short)vec_splats (__B);
1405 c = (__vector bool short)vec_cmpgt (a, b); 1430 c = (__vector __bool short)vec_cmpgt (a, b);
1406 r = vec_sel (b, a, c); 1431 r = vec_sel (b, a, c);
1407 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); 1432 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
1408 #else 1433 #else
1409 __m64_union m1, m2, res; 1434 __m64_union m1, m2, res;
1410 1435
1411 m1.as_m64 = __A; 1436 m1.as_m64 = __A;
1412 m2.as_m64 = __B; 1437 m2.as_m64 = __B;
1434 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1459 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1435 _mm_max_pu8 (__m64 __A, __m64 __B) 1460 _mm_max_pu8 (__m64 __A, __m64 __B)
1436 { 1461 {
1437 #if _ARCH_PWR8 1462 #if _ARCH_PWR8
1438 __vector unsigned char a, b, r; 1463 __vector unsigned char a, b, r;
1439 __vector bool char c; 1464 __vector __bool char c;
1440 1465
1441 a = (__vector unsigned char)vec_splats (__A); 1466 a = (__vector unsigned char)vec_splats (__A);
1442 b = (__vector unsigned char)vec_splats (__B); 1467 b = (__vector unsigned char)vec_splats (__B);
1443 c = (__vector bool char)vec_cmpgt (a, b); 1468 c = (__vector __bool char)vec_cmpgt (a, b);
1444 r = vec_sel (b, a, c); 1469 r = vec_sel (b, a, c);
1445 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); 1470 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
1446 #else 1471 #else
1447 __m64_union m1, m2, res; 1472 __m64_union m1, m2, res;
1448 long i; 1473 long i;
1449 1474
1450 m1.as_m64 = __A; 1475 m1.as_m64 = __A;
1470 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1495 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm_min_pi16 (__m64 __A, __m64 __B) 1496 _mm_min_pi16 (__m64 __A, __m64 __B)
1472 { 1497 {
1473 #if _ARCH_PWR8 1498 #if _ARCH_PWR8
1474 __vector signed short a, b, r; 1499 __vector signed short a, b, r;
1475 __vector bool short c; 1500 __vector __bool short c;
1476 1501
1477 a = (__vector signed short)vec_splats (__A); 1502 a = (__vector signed short)vec_splats (__A);
1478 b = (__vector signed short)vec_splats (__B); 1503 b = (__vector signed short)vec_splats (__B);
1479 c = (__vector bool short)vec_cmplt (a, b); 1504 c = (__vector __bool short)vec_cmplt (a, b);
1480 r = vec_sel (b, a, c); 1505 r = vec_sel (b, a, c);
1481 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); 1506 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
1482 #else 1507 #else
1483 __m64_union m1, m2, res; 1508 __m64_union m1, m2, res;
1484 1509
1485 m1.as_m64 = __A; 1510 m1.as_m64 = __A;
1486 m2.as_m64 = __B; 1511 m2.as_m64 = __B;
1508 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1533 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm_min_pu8 (__m64 __A, __m64 __B) 1534 _mm_min_pu8 (__m64 __A, __m64 __B)
1510 { 1535 {
1511 #if _ARCH_PWR8 1536 #if _ARCH_PWR8
1512 __vector unsigned char a, b, r; 1537 __vector unsigned char a, b, r;
1513 __vector bool char c; 1538 __vector __bool char c;
1514 1539
1515 a = (__vector unsigned char)vec_splats (__A); 1540 a = (__vector unsigned char)vec_splats (__A);
1516 b = (__vector unsigned char)vec_splats (__B); 1541 b = (__vector unsigned char)vec_splats (__B);
1517 c = (__vector bool char)vec_cmplt (a, b); 1542 c = (__vector __bool char)vec_cmplt (a, b);
1518 r = vec_sel (b, a, c); 1543 r = vec_sel (b, a, c);
1519 return (__builtin_unpack_vector_int128 ((__vector __int128_t)r, 0)); 1544 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
1520 #else 1545 #else
1521 __m64_union m1, m2, res; 1546 __m64_union m1, m2, res;
1522 long i; 1547 long i;
1523 1548
1524 m1.as_m64 = __A; 1549 m1.as_m64 = __A;
1600 #elif __BIG_ENDIAN__ 1625 #elif __BIG_ENDIAN__
1601 0x0607, 0x0405, 0x0203, 0x0001 1626 0x0607, 0x0405, 0x0203, 0x0001
1602 #endif 1627 #endif
1603 }; 1628 };
1604 __m64_union t; 1629 __m64_union t;
1605 __vector __m64 a, p, r; 1630 __vector unsigned long long a, p, r;
1606 1631
1607 #ifdef __LITTLE_ENDIAN__ 1632 #ifdef __LITTLE_ENDIAN__
1608 t.as_short[0] = permute_selectors[element_selector_10]; 1633 t.as_short[0] = permute_selectors[element_selector_10];
1609 t.as_short[1] = permute_selectors[element_selector_32]; 1634 t.as_short[1] = permute_selectors[element_selector_32];
1610 t.as_short[2] = permute_selectors[element_selector_54]; 1635 t.as_short[2] = permute_selectors[element_selector_54];