comparison gcc/config/arm/mmintrin.h @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents a06113de4d67
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Copyright (C) 2002, 2003, 2004, 2009 Free Software Foundation, Inc. 1 /* Copyright (C) 2002-2017 Free Software Foundation, Inc.
2 2
3 This file is part of GCC. 3 This file is part of GCC.
4 4
5 GCC is free software; you can redistribute it and/or modify it 5 GCC is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published 6 under the terms of the GNU General Public License as published
22 <http://www.gnu.org/licenses/>. */ 22 <http://www.gnu.org/licenses/>. */
23 23
24 #ifndef _MMINTRIN_H_INCLUDED 24 #ifndef _MMINTRIN_H_INCLUDED
25 #define _MMINTRIN_H_INCLUDED 25 #define _MMINTRIN_H_INCLUDED
26 26
27 #ifndef __IWMMXT__
28 #error mmintrin.h included without enabling WMMX/WMMX2 instructions (e.g. -march=iwmmxt or -march=iwmmxt2)
29 #endif
30
31
32 #if defined __cplusplus
33 extern "C" {
34 /* Intrinsics use C name-mangling. */
35 #endif /* __cplusplus */
36
27 /* The data type intended for user use. */ 37 /* The data type intended for user use. */
28 typedef unsigned long long __m64, __int64; 38 typedef unsigned long long __m64, __int64;
29 39
30 /* Internal data types for implementing the intrinsics. */ 40 /* Internal data types for implementing the intrinsics. */
31 typedef int __v2si __attribute__ ((vector_size (8))); 41 typedef int __v2si __attribute__ ((vector_size (8)));
32 typedef short __v4hi __attribute__ ((vector_size (8))); 42 typedef short __v4hi __attribute__ ((vector_size (8)));
33 typedef char __v8qi __attribute__ ((vector_size (8))); 43 typedef signed char __v8qi __attribute__ ((vector_size (8)));
44
45 /* Provided for source compatibility with MMX. */
46 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47 _mm_empty (void)
48 {
49 }
34 50
35 /* "Convert" __m64 and __int64 into each other. */ 51 /* "Convert" __m64 and __int64 into each other. */
36 static __inline __m64 52 static __inline __m64
37 _mm_cvtsi64_m64 (__int64 __i) 53 _mm_cvtsi64_m64 (__int64 __i)
38 { 54 {
39 return __i; 55 return __i;
40 } 56 }
41 57
52 } 68 }
53 69
54 static __inline __int64 70 static __inline __int64
55 _mm_cvtsi32_si64 (int __i) 71 _mm_cvtsi32_si64 (int __i)
56 { 72 {
57 return __i; 73 return (__i & 0xffffffff);
58 } 74 }
59 75
60 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of 76 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
61 the result, and the four 16-bit values from M2 into the upper four 8-bit 77 the result, and the four 16-bit values from M2 into the upper four 8-bit
62 values of the result, all with signed saturation. */ 78 values of the result, all with signed saturation. */
601 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 617 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
602 64-bit value in M2. */ 618 64-bit value in M2. */
603 static __inline __m64 619 static __inline __m64
604 _mm_andnot_si64 (__m64 __m1, __m64 __m2) 620 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
605 { 621 {
606 return __builtin_arm_wandn (__m1, __m2); 622 return __builtin_arm_wandn (__m2, __m1);
607 } 623 }
608 624
609 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 625 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
610 static __inline __m64 626 static __inline __m64
611 _mm_or_si64 (__m64 __m1, __m64 __m2) 627 _mm_or_si64 (__m64 __m1, __m64 __m2)
933 values in A and B. Return the value in the lower 16-bit word; the 949 values in A and B. Return the value in the lower 16-bit word; the
934 upper words are cleared. */ 950 upper words are cleared. */
935 static __inline __m64 951 static __inline __m64
936 _mm_sad_pu8 (__m64 __A, __m64 __B) 952 _mm_sad_pu8 (__m64 __A, __m64 __B)
937 { 953 {
938 return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B); 954 return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
955 }
956
957 static __inline __m64
958 _mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C)
959 {
960 return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C);
939 } 961 }
940 962
941 /* Compute the sum of the absolute differences of the unsigned 16-bit 963 /* Compute the sum of the absolute differences of the unsigned 16-bit
942 values in A and B. Return the value in the lower 32-bit word; the 964 values in A and B. Return the value in the lower 32-bit word; the
943 upper words are cleared. */ 965 upper words are cleared. */
944 static __inline __m64 966 static __inline __m64
945 _mm_sad_pu16 (__m64 __A, __m64 __B) 967 _mm_sad_pu16 (__m64 __A, __m64 __B)
946 { 968 {
947 return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B); 969 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
948 } 970 }
971
972 static __inline __m64
973 _mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C)
974 {
975 return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C);
976 }
977
949 978
950 /* Compute the sum of the absolute differences of the unsigned 8-bit 979 /* Compute the sum of the absolute differences of the unsigned 8-bit
951 values in A and B. Return the value in the lower 16-bit word; the 980 values in A and B. Return the value in the lower 16-bit word; the
952 upper words are cleared. */ 981 upper words are cleared. */
953 static __inline __m64 982 static __inline __m64
963 _mm_sadz_pu16 (__m64 __A, __m64 __B) 992 _mm_sadz_pu16 (__m64 __A, __m64 __B)
964 { 993 {
965 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); 994 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
966 } 995 }
967 996
968 static __inline __m64 997 #define _mm_align_si64(__A,__B, N) \
969 _mm_align_si64 (__m64 __A, __m64 __B, int __C) 998 (__m64) __builtin_arm_walign ((__v8qi) (__A),(__v8qi) (__B), (N))
970 {
971 return (__m64) __builtin_arm_walign ((__v8qi)__A, (__v8qi)__B, __C);
972 }
973 999
974 /* Creates a 64-bit zero. */ 1000 /* Creates a 64-bit zero. */
975 static __inline __m64 1001 static __inline __m64
976 _mm_setzero_si64 (void) 1002 _mm_setzero_si64 (void)
977 { 1003 {
985 static __inline void 1011 static __inline void
986 _mm_setwcx (const int __value, const int __regno) 1012 _mm_setwcx (const int __value, const int __regno)
987 { 1013 {
988 switch (__regno) 1014 switch (__regno)
989 { 1015 {
990 case 0: __builtin_arm_setwcx (__value, 0); break; 1016 case 0:
991 case 1: __builtin_arm_setwcx (__value, 1); break; 1017 __asm __volatile ("tmcr wcid, %0" :: "r"(__value));
992 case 2: __builtin_arm_setwcx (__value, 2); break; 1018 break;
993 case 3: __builtin_arm_setwcx (__value, 3); break; 1019 case 1:
994 case 8: __builtin_arm_setwcx (__value, 8); break; 1020 __asm __volatile ("tmcr wcon, %0" :: "r"(__value));
995 case 9: __builtin_arm_setwcx (__value, 9); break; 1021 break;
996 case 10: __builtin_arm_setwcx (__value, 10); break; 1022 case 2:
997 case 11: __builtin_arm_setwcx (__value, 11); break; 1023 __asm __volatile ("tmcr wcssf, %0" :: "r"(__value));
998 default: break; 1024 break;
1025 case 3:
1026 __asm __volatile ("tmcr wcasf, %0" :: "r"(__value));
1027 break;
1028 case 8:
1029 __builtin_arm_setwcgr0 (__value);
1030 break;
1031 case 9:
1032 __builtin_arm_setwcgr1 (__value);
1033 break;
1034 case 10:
1035 __builtin_arm_setwcgr2 (__value);
1036 break;
1037 case 11:
1038 __builtin_arm_setwcgr3 (__value);
1039 break;
1040 default:
1041 break;
999 } 1042 }
1000 } 1043 }
1001 1044
1002 static __inline int 1045 static __inline int
1003 _mm_getwcx (const int __regno) 1046 _mm_getwcx (const int __regno)
1004 { 1047 {
1048 int __value;
1005 switch (__regno) 1049 switch (__regno)
1006 { 1050 {
1007 case 0: return __builtin_arm_getwcx (0); 1051 case 0:
1008 case 1: return __builtin_arm_getwcx (1); 1052 __asm __volatile ("tmrc %0, wcid" : "=r"(__value));
1009 case 2: return __builtin_arm_getwcx (2); 1053 break;
1010 case 3: return __builtin_arm_getwcx (3); 1054 case 1:
1011 case 8: return __builtin_arm_getwcx (8); 1055 __asm __volatile ("tmrc %0, wcon" : "=r"(__value));
1012 case 9: return __builtin_arm_getwcx (9); 1056 break;
1013 case 10: return __builtin_arm_getwcx (10); 1057 case 2:
1014 case 11: return __builtin_arm_getwcx (11); 1058 __asm __volatile ("tmrc %0, wcssf" : "=r"(__value));
1015 default: return 0; 1059 break;
1060 case 3:
1061 __asm __volatile ("tmrc %0, wcasf" : "=r"(__value));
1062 break;
1063 case 8:
1064 return __builtin_arm_getwcgr0 ();
1065 case 9:
1066 return __builtin_arm_getwcgr1 ();
1067 case 10:
1068 return __builtin_arm_getwcgr2 ();
1069 case 11:
1070 return __builtin_arm_getwcgr3 ();
1071 default:
1072 break;
1016 } 1073 }
1074 return __value;
1017 } 1075 }
1018 1076
1019 /* Creates a vector of two 32-bit values; I0 is least significant. */ 1077 /* Creates a vector of two 32-bit values; I0 is least significant. */
1020 static __inline __m64 1078 static __inline __m64
1021 _mm_set_pi32 (int __i1, int __i0) 1079 _mm_set_pi32 (int __i1, int __i0)
1022 { 1080 {
1023 union { 1081 union
1082 {
1024 __m64 __q; 1083 __m64 __q;
1025 struct { 1084 struct
1085 {
1026 unsigned int __i0; 1086 unsigned int __i0;
1027 unsigned int __i1; 1087 unsigned int __i1;
1028 } __s; 1088 } __s;
1029 } __u; 1089 } __u;
1030 1090
1036 1096
1037 /* Creates a vector of four 16-bit values; W0 is least significant. */ 1097 /* Creates a vector of four 16-bit values; W0 is least significant. */
1038 static __inline __m64 1098 static __inline __m64
1039 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) 1099 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
1040 { 1100 {
1041 unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2; 1101 unsigned int __i1 = (unsigned short) __w3 << 16 | (unsigned short) __w2;
1042 unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0; 1102 unsigned int __i0 = (unsigned short) __w1 << 16 | (unsigned short) __w0;
1103
1043 return _mm_set_pi32 (__i1, __i0); 1104 return _mm_set_pi32 (__i1, __i0);
1044
1045 } 1105 }
1046 1106
1047 /* Creates a vector of eight 8-bit values; B0 is least significant. */ 1107 /* Creates a vector of eight 8-bit values; B0 is least significant. */
1048 static __inline __m64 1108 static __inline __m64
1049 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, 1109 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
1106 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; 1166 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
1107 unsigned int __i = __w << 16 | __w; 1167 unsigned int __i = __w << 16 | __w;
1108 return _mm_set1_pi32 (__i); 1168 return _mm_set1_pi32 (__i);
1109 } 1169 }
1110 1170
1111 /* Convert an integer to a __m64 object. */ 1171 #ifdef __IWMMXT2__
1112 static __inline __m64 1172 static __inline __m64
1113 _m_from_int (int __a) 1173 _mm_abs_pi8 (__m64 m1)
1114 { 1174 {
1115 return (__m64)__a; 1175 return (__m64) __builtin_arm_wabsb ((__v8qi)m1);
1116 } 1176 }
1117 1177
1178 static __inline __m64
1179 _mm_abs_pi16 (__m64 m1)
1180 {
1181 return (__m64) __builtin_arm_wabsh ((__v4hi)m1);
1182
1183 }
1184
1185 static __inline __m64
1186 _mm_abs_pi32 (__m64 m1)
1187 {
1188 return (__m64) __builtin_arm_wabsw ((__v2si)m1);
1189
1190 }
1191
1192 static __inline __m64
1193 _mm_addsubhx_pi16 (__m64 a, __m64 b)
1194 {
1195 return (__m64) __builtin_arm_waddsubhx ((__v4hi)a, (__v4hi)b);
1196 }
1197
1198 static __inline __m64
1199 _mm_absdiff_pu8 (__m64 a, __m64 b)
1200 {
1201 return (__m64) __builtin_arm_wabsdiffb ((__v8qi)a, (__v8qi)b);
1202 }
1203
1204 static __inline __m64
1205 _mm_absdiff_pu16 (__m64 a, __m64 b)
1206 {
1207 return (__m64) __builtin_arm_wabsdiffh ((__v4hi)a, (__v4hi)b);
1208 }
1209
1210 static __inline __m64
1211 _mm_absdiff_pu32 (__m64 a, __m64 b)
1212 {
1213 return (__m64) __builtin_arm_wabsdiffw ((__v2si)a, (__v2si)b);
1214 }
1215
1216 static __inline __m64
1217 _mm_addc_pu16 (__m64 a, __m64 b)
1218 {
1219 __m64 result;
1220 __asm__ __volatile__ ("waddhc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b));
1221 return result;
1222 }
1223
1224 static __inline __m64
1225 _mm_addc_pu32 (__m64 a, __m64 b)
1226 {
1227 __m64 result;
1228 __asm__ __volatile__ ("waddwc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b));
1229 return result;
1230 }
1231
1232 static __inline __m64
1233 _mm_avg4_pu8 (__m64 a, __m64 b)
1234 {
1235 return (__m64) __builtin_arm_wavg4 ((__v8qi)a, (__v8qi)b);
1236 }
1237
1238 static __inline __m64
1239 _mm_avg4r_pu8 (__m64 a, __m64 b)
1240 {
1241 return (__m64) __builtin_arm_wavg4r ((__v8qi)a, (__v8qi)b);
1242 }
1243
1244 static __inline __m64
1245 _mm_maddx_pi16 (__m64 a, __m64 b)
1246 {
1247 return (__m64) __builtin_arm_wmaddsx ((__v4hi)a, (__v4hi)b);
1248 }
1249
1250 static __inline __m64
1251 _mm_maddx_pu16 (__m64 a, __m64 b)
1252 {
1253 return (__m64) __builtin_arm_wmaddux ((__v4hi)a, (__v4hi)b);
1254 }
1255
1256 static __inline __m64
1257 _mm_msub_pi16 (__m64 a, __m64 b)
1258 {
1259 return (__m64) __builtin_arm_wmaddsn ((__v4hi)a, (__v4hi)b);
1260 }
1261
1262 static __inline __m64
1263 _mm_msub_pu16 (__m64 a, __m64 b)
1264 {
1265 return (__m64) __builtin_arm_wmaddun ((__v4hi)a, (__v4hi)b);
1266 }
1267
1268 static __inline __m64
1269 _mm_mulhi_pi32 (__m64 a, __m64 b)
1270 {
1271 return (__m64) __builtin_arm_wmulwsm ((__v2si)a, (__v2si)b);
1272 }
1273
1274 static __inline __m64
1275 _mm_mulhi_pu32 (__m64 a, __m64 b)
1276 {
1277 return (__m64) __builtin_arm_wmulwum ((__v2si)a, (__v2si)b);
1278 }
1279
1280 static __inline __m64
1281 _mm_mulhir_pi16 (__m64 a, __m64 b)
1282 {
1283 return (__m64) __builtin_arm_wmulsmr ((__v4hi)a, (__v4hi)b);
1284 }
1285
1286 static __inline __m64
1287 _mm_mulhir_pi32 (__m64 a, __m64 b)
1288 {
1289 return (__m64) __builtin_arm_wmulwsmr ((__v2si)a, (__v2si)b);
1290 }
1291
1292 static __inline __m64
1293 _mm_mulhir_pu16 (__m64 a, __m64 b)
1294 {
1295 return (__m64) __builtin_arm_wmulumr ((__v4hi)a, (__v4hi)b);
1296 }
1297
1298 static __inline __m64
1299 _mm_mulhir_pu32 (__m64 a, __m64 b)
1300 {
1301 return (__m64) __builtin_arm_wmulwumr ((__v2si)a, (__v2si)b);
1302 }
1303
1304 static __inline __m64
1305 _mm_mullo_pi32 (__m64 a, __m64 b)
1306 {
1307 return (__m64) __builtin_arm_wmulwl ((__v2si)a, (__v2si)b);
1308 }
1309
1310 static __inline __m64
1311 _mm_qmulm_pi16 (__m64 a, __m64 b)
1312 {
1313 return (__m64) __builtin_arm_wqmulm ((__v4hi)a, (__v4hi)b);
1314 }
1315
1316 static __inline __m64
1317 _mm_qmulm_pi32 (__m64 a, __m64 b)
1318 {
1319 return (__m64) __builtin_arm_wqmulwm ((__v2si)a, (__v2si)b);
1320 }
1321
1322 static __inline __m64
1323 _mm_qmulmr_pi16 (__m64 a, __m64 b)
1324 {
1325 return (__m64) __builtin_arm_wqmulmr ((__v4hi)a, (__v4hi)b);
1326 }
1327
1328 static __inline __m64
1329 _mm_qmulmr_pi32 (__m64 a, __m64 b)
1330 {
1331 return (__m64) __builtin_arm_wqmulwmr ((__v2si)a, (__v2si)b);
1332 }
1333
1334 static __inline __m64
1335 _mm_subaddhx_pi16 (__m64 a, __m64 b)
1336 {
1337 return (__m64) __builtin_arm_wsubaddhx ((__v4hi)a, (__v4hi)b);
1338 }
1339
1340 static __inline __m64
1341 _mm_addbhusl_pu8 (__m64 a, __m64 b)
1342 {
1343 return (__m64) __builtin_arm_waddbhusl ((__v4hi)a, (__v8qi)b);
1344 }
1345
1346 static __inline __m64
1347 _mm_addbhusm_pu8 (__m64 a, __m64 b)
1348 {
1349 return (__m64) __builtin_arm_waddbhusm ((__v4hi)a, (__v8qi)b);
1350 }
1351
1352 #define _mm_qmiabb_pi32(acc, m1, m2) \
1353 ({\
1354 __m64 _acc = acc;\
1355 __m64 _m1 = m1;\
1356 __m64 _m2 = m2;\
1357 _acc = (__m64) __builtin_arm_wqmiabb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1358 _acc;\
1359 })
1360
1361 #define _mm_qmiabbn_pi32(acc, m1, m2) \
1362 ({\
1363 __m64 _acc = acc;\
1364 __m64 _m1 = m1;\
1365 __m64 _m2 = m2;\
1366 _acc = (__m64) __builtin_arm_wqmiabbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1367 _acc;\
1368 })
1369
1370 #define _mm_qmiabt_pi32(acc, m1, m2) \
1371 ({\
1372 __m64 _acc = acc;\
1373 __m64 _m1 = m1;\
1374 __m64 _m2 = m2;\
1375 _acc = (__m64) __builtin_arm_wqmiabt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1376 _acc;\
1377 })
1378
1379 #define _mm_qmiabtn_pi32(acc, m1, m2) \
1380 ({\
1381 __m64 _acc=acc;\
1382 __m64 _m1=m1;\
1383 __m64 _m2=m2;\
1384 _acc = (__m64) __builtin_arm_wqmiabtn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1385 _acc;\
1386 })
1387
1388 #define _mm_qmiatb_pi32(acc, m1, m2) \
1389 ({\
1390 __m64 _acc = acc;\
1391 __m64 _m1 = m1;\
1392 __m64 _m2 = m2;\
1393 _acc = (__m64) __builtin_arm_wqmiatb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1394 _acc;\
1395 })
1396
1397 #define _mm_qmiatbn_pi32(acc, m1, m2) \
1398 ({\
1399 __m64 _acc = acc;\
1400 __m64 _m1 = m1;\
1401 __m64 _m2 = m2;\
1402 _acc = (__m64) __builtin_arm_wqmiatbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1403 _acc;\
1404 })
1405
1406 #define _mm_qmiatt_pi32(acc, m1, m2) \
1407 ({\
1408 __m64 _acc = acc;\
1409 __m64 _m1 = m1;\
1410 __m64 _m2 = m2;\
1411 _acc = (__m64) __builtin_arm_wqmiatt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1412 _acc;\
1413 })
1414
1415 #define _mm_qmiattn_pi32(acc, m1, m2) \
1416 ({\
1417 __m64 _acc = acc;\
1418 __m64 _m1 = m1;\
1419 __m64 _m2 = m2;\
1420 _acc = (__m64) __builtin_arm_wqmiattn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
1421 _acc;\
1422 })
1423
1424 #define _mm_wmiabb_si64(acc, m1, m2) \
1425 ({\
1426 __m64 _acc = acc;\
1427 __m64 _m1 = m1;\
1428 __m64 _m2 = m2;\
1429 _acc = (__m64) __builtin_arm_wmiabb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1430 _acc;\
1431 })
1432
1433 #define _mm_wmiabbn_si64(acc, m1, m2) \
1434 ({\
1435 __m64 _acc = acc;\
1436 __m64 _m1 = m1;\
1437 __m64 _m2 = m2;\
1438 _acc = (__m64) __builtin_arm_wmiabbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1439 _acc;\
1440 })
1441
1442 #define _mm_wmiabt_si64(acc, m1, m2) \
1443 ({\
1444 __m64 _acc = acc;\
1445 __m64 _m1 = m1;\
1446 __m64 _m2 = m2;\
1447 _acc = (__m64) __builtin_arm_wmiabt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1448 _acc;\
1449 })
1450
1451 #define _mm_wmiabtn_si64(acc, m1, m2) \
1452 ({\
1453 __m64 _acc = acc;\
1454 __m64 _m1 = m1;\
1455 __m64 _m2 = m2;\
1456 _acc = (__m64) __builtin_arm_wmiabtn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1457 _acc;\
1458 })
1459
1460 #define _mm_wmiatb_si64(acc, m1, m2) \
1461 ({\
1462 __m64 _acc = acc;\
1463 __m64 _m1 = m1;\
1464 __m64 _m2 = m2;\
1465 _acc = (__m64) __builtin_arm_wmiatb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1466 _acc;\
1467 })
1468
1469 #define _mm_wmiatbn_si64(acc, m1, m2) \
1470 ({\
1471 __m64 _acc = acc;\
1472 __m64 _m1 = m1;\
1473 __m64 _m2 = m2;\
1474 _acc = (__m64) __builtin_arm_wmiatbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1475 _acc;\
1476 })
1477
1478 #define _mm_wmiatt_si64(acc, m1, m2) \
1479 ({\
1480 __m64 _acc = acc;\
1481 __m64 _m1 = m1;\
1482 __m64 _m2 = m2;\
1483 _acc = (__m64) __builtin_arm_wmiatt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1484 _acc;\
1485 })
1486
1487 #define _mm_wmiattn_si64(acc, m1, m2) \
1488 ({\
1489 __m64 _acc = acc;\
1490 __m64 _m1 = m1;\
1491 __m64 _m2 = m2;\
1492 _acc = (__m64) __builtin_arm_wmiattn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
1493 _acc;\
1494 })
1495
1496 #define _mm_wmiawbb_si64(acc, m1, m2) \
1497 ({\
1498 __m64 _acc = acc;\
1499 __m64 _m1 = m1;\
1500 __m64 _m2 = m2;\
1501 _acc = (__m64) __builtin_arm_wmiawbb (_acc, (__v2si)_m1, (__v2si)_m2);\
1502 _acc;\
1503 })
1504
1505 #define _mm_wmiawbbn_si64(acc, m1, m2) \
1506 ({\
1507 __m64 _acc = acc;\
1508 __m64 _m1 = m1;\
1509 __m64 _m2 = m2;\
1510 _acc = (__m64) __builtin_arm_wmiawbbn (_acc, (__v2si)_m1, (__v2si)_m2);\
1511 _acc;\
1512 })
1513
1514 #define _mm_wmiawbt_si64(acc, m1, m2) \
1515 ({\
1516 __m64 _acc = acc;\
1517 __m64 _m1 = m1;\
1518 __m64 _m2 = m2;\
1519 _acc = (__m64) __builtin_arm_wmiawbt (_acc, (__v2si)_m1, (__v2si)_m2);\
1520 _acc;\
1521 })
1522
1523 #define _mm_wmiawbtn_si64(acc, m1, m2) \
1524 ({\
1525 __m64 _acc = acc;\
1526 __m64 _m1 = m1;\
1527 __m64 _m2 = m2;\
1528 _acc = (__m64) __builtin_arm_wmiawbtn (_acc, (__v2si)_m1, (__v2si)_m2);\
1529 _acc;\
1530 })
1531
1532 #define _mm_wmiawtb_si64(acc, m1, m2) \
1533 ({\
1534 __m64 _acc = acc;\
1535 __m64 _m1 = m1;\
1536 __m64 _m2 = m2;\
1537 _acc = (__m64) __builtin_arm_wmiawtb (_acc, (__v2si)_m1, (__v2si)_m2);\
1538 _acc;\
1539 })
1540
1541 #define _mm_wmiawtbn_si64(acc, m1, m2) \
1542 ({\
1543 __m64 _acc = acc;\
1544 __m64 _m1 = m1;\
1545 __m64 _m2 = m2;\
1546 _acc = (__m64) __builtin_arm_wmiawtbn (_acc, (__v2si)_m1, (__v2si)_m2);\
1547 _acc;\
1548 })
1549
1550 #define _mm_wmiawtt_si64(acc, m1, m2) \
1551 ({\
1552 __m64 _acc = acc;\
1553 __m64 _m1 = m1;\
1554 __m64 _m2 = m2;\
1555 _acc = (__m64) __builtin_arm_wmiawtt (_acc, (__v2si)_m1, (__v2si)_m2);\
1556 _acc;\
1557 })
1558
1559 #define _mm_wmiawttn_si64(acc, m1, m2) \
1560 ({\
1561 __m64 _acc = acc;\
1562 __m64 _m1 = m1;\
1563 __m64 _m2 = m2;\
1564 _acc = (__m64) __builtin_arm_wmiawttn (_acc, (__v2si)_m1, (__v2si)_m2);\
1565 _acc;\
1566 })
1567
1568 /* The third arguments should be an immediate. */
1569 #define _mm_merge_si64(a, b, n) \
1570 ({\
1571 __m64 result;\
1572 result = (__m64) __builtin_arm_wmerge ((__m64) (a), (__m64) (b), (n));\
1573 result;\
1574 })
1575 #endif /* __IWMMXT2__ */
1576
1577 static __inline __m64
1578 _mm_alignr0_si64 (__m64 a, __m64 b)
1579 {
1580 return (__m64) __builtin_arm_walignr0 ((__v8qi) a, (__v8qi) b);
1581 }
1582
1583 static __inline __m64
1584 _mm_alignr1_si64 (__m64 a, __m64 b)
1585 {
1586 return (__m64) __builtin_arm_walignr1 ((__v8qi) a, (__v8qi) b);
1587 }
1588
1589 static __inline __m64
1590 _mm_alignr2_si64 (__m64 a, __m64 b)
1591 {
1592 return (__m64) __builtin_arm_walignr2 ((__v8qi) a, (__v8qi) b);
1593 }
1594
1595 static __inline __m64
1596 _mm_alignr3_si64 (__m64 a, __m64 b)
1597 {
1598 return (__m64) __builtin_arm_walignr3 ((__v8qi) a, (__v8qi) b);
1599 }
1600
1601 static __inline void
1602 _mm_tandcb ()
1603 {
1604 __asm __volatile ("tandcb r15");
1605 }
1606
1607 static __inline void
1608 _mm_tandch ()
1609 {
1610 __asm __volatile ("tandch r15");
1611 }
1612
1613 static __inline void
1614 _mm_tandcw ()
1615 {
1616 __asm __volatile ("tandcw r15");
1617 }
1618
1619 #define _mm_textrcb(n) \
1620 ({\
1621 __asm__ __volatile__ (\
1622 "textrcb r15, %0" : : "i" (n));\
1623 })
1624
1625 #define _mm_textrch(n) \
1626 ({\
1627 __asm__ __volatile__ (\
1628 "textrch r15, %0" : : "i" (n));\
1629 })
1630
1631 #define _mm_textrcw(n) \
1632 ({\
1633 __asm__ __volatile__ (\
1634 "textrcw r15, %0" : : "i" (n));\
1635 })
1636
1637 static __inline void
1638 _mm_torcb ()
1639 {
1640 __asm __volatile ("torcb r15");
1641 }
1642
1643 static __inline void
1644 _mm_torch ()
1645 {
1646 __asm __volatile ("torch r15");
1647 }
1648
1649 static __inline void
1650 _mm_torcw ()
1651 {
1652 __asm __volatile ("torcw r15");
1653 }
1654
1655 #ifdef __IWMMXT2__
1656 static __inline void
1657 _mm_torvscb ()
1658 {
1659 __asm __volatile ("torvscb r15");
1660 }
1661
1662 static __inline void
1663 _mm_torvsch ()
1664 {
1665 __asm __volatile ("torvsch r15");
1666 }
1667
1668 static __inline void
1669 _mm_torvscw ()
1670 {
1671 __asm __volatile ("torvscw r15");
1672 }
1673 #endif /* __IWMMXT2__ */
1674
1675 static __inline __m64
1676 _mm_tbcst_pi8 (int value)
1677 {
1678 return (__m64) __builtin_arm_tbcstb ((signed char) value);
1679 }
1680
1681 static __inline __m64
1682 _mm_tbcst_pi16 (int value)
1683 {
1684 return (__m64) __builtin_arm_tbcsth ((short) value);
1685 }
1686
1687 static __inline __m64
1688 _mm_tbcst_pi32 (int value)
1689 {
1690 return (__m64) __builtin_arm_tbcstw (value);
1691 }
1692
1693 #define _m_empty _mm_empty
1118 #define _m_packsswb _mm_packs_pi16 1694 #define _m_packsswb _mm_packs_pi16
1119 #define _m_packssdw _mm_packs_pi32 1695 #define _m_packssdw _mm_packs_pi32
1120 #define _m_packuswb _mm_packs_pu16 1696 #define _m_packuswb _mm_packs_pu16
1121 #define _m_packusdw _mm_packs_pu32 1697 #define _m_packusdw _mm_packs_pu32
1122 #define _m_packssqd _mm_packs_pi64 1698 #define _m_packssqd _mm_packs_pi64
1248 #define _m_psadzbw _mm_sadz_pu8 1824 #define _m_psadzbw _mm_sadz_pu8
1249 #define _m_psadzwd _mm_sadz_pu16 1825 #define _m_psadzwd _mm_sadz_pu16
1250 #define _m_paligniq _mm_align_si64 1826 #define _m_paligniq _mm_align_si64
1251 #define _m_cvt_si2pi _mm_cvtsi64_m64 1827 #define _m_cvt_si2pi _mm_cvtsi64_m64
1252 #define _m_cvt_pi2si _mm_cvtm64_si64 1828 #define _m_cvt_pi2si _mm_cvtm64_si64
1829 #define _m_from_int _mm_cvtsi32_si64
1830 #define _m_to_int _mm_cvtsi64_si32
1831
1832 #if defined __cplusplus
1833 }; /* End "C" */
1834 #endif /* __cplusplus */
1253 1835
1254 #endif /* _MMINTRIN_H_INCLUDED */ 1836 #endif /* _MMINTRIN_H_INCLUDED */