Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/mmintrin.h @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | a06113de4d67 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Copyright (C) 2002, 2003, 2004, 2009 Free Software Foundation, Inc. | 1 /* Copyright (C) 2002-2017 Free Software Foundation, Inc. |
2 | 2 |
3 This file is part of GCC. | 3 This file is part of GCC. |
4 | 4 |
5 GCC is free software; you can redistribute it and/or modify it | 5 GCC is free software; you can redistribute it and/or modify it |
6 under the terms of the GNU General Public License as published | 6 under the terms of the GNU General Public License as published |
22 <http://www.gnu.org/licenses/>. */ | 22 <http://www.gnu.org/licenses/>. */ |
23 | 23 |
24 #ifndef _MMINTRIN_H_INCLUDED | 24 #ifndef _MMINTRIN_H_INCLUDED |
25 #define _MMINTRIN_H_INCLUDED | 25 #define _MMINTRIN_H_INCLUDED |
26 | 26 |
27 #ifndef __IWMMXT__ | |
28 #error mmintrin.h included without enabling WMMX/WMMX2 instructions (e.g. -march=iwmmxt or -march=iwmmxt2) | |
29 #endif | |
30 | |
31 | |
32 #if defined __cplusplus | |
33 extern "C" { | |
34 /* Intrinsics use C name-mangling. */ | |
35 #endif /* __cplusplus */ | |
36 | |
27 /* The data type intended for user use. */ | 37 /* The data type intended for user use. */ |
28 typedef unsigned long long __m64, __int64; | 38 typedef unsigned long long __m64, __int64; |
29 | 39 |
30 /* Internal data types for implementing the intrinsics. */ | 40 /* Internal data types for implementing the intrinsics. */ |
31 typedef int __v2si __attribute__ ((vector_size (8))); | 41 typedef int __v2si __attribute__ ((vector_size (8))); |
32 typedef short __v4hi __attribute__ ((vector_size (8))); | 42 typedef short __v4hi __attribute__ ((vector_size (8))); |
33 typedef char __v8qi __attribute__ ((vector_size (8))); | 43 typedef signed char __v8qi __attribute__ ((vector_size (8))); |
44 | |
45 /* Provided for source compatibility with MMX. */ | |
46 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
47 _mm_empty (void) | |
48 { | |
49 } | |
34 | 50 |
35 /* "Convert" __m64 and __int64 into each other. */ | 51 /* "Convert" __m64 and __int64 into each other. */ |
36 static __inline __m64 | 52 static __inline __m64 |
37 _mm_cvtsi64_m64 (__int64 __i) | 53 _mm_cvtsi64_m64 (__int64 __i) |
38 { | 54 { |
39 return __i; | 55 return __i; |
40 } | 56 } |
41 | 57 |
52 } | 68 } |
53 | 69 |
54 static __inline __int64 | 70 static __inline __int64 |
55 _mm_cvtsi32_si64 (int __i) | 71 _mm_cvtsi32_si64 (int __i) |
56 { | 72 { |
57 return __i; | 73 return (__i & 0xffffffff); |
58 } | 74 } |
59 | 75 |
60 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of | 76 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of |
61 the result, and the four 16-bit values from M2 into the upper four 8-bit | 77 the result, and the four 16-bit values from M2 into the upper four 8-bit |
62 values of the result, all with signed saturation. */ | 78 values of the result, all with signed saturation. */ |
601 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the | 617 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the |
602 64-bit value in M2. */ | 618 64-bit value in M2. */ |
603 static __inline __m64 | 619 static __inline __m64 |
604 _mm_andnot_si64 (__m64 __m1, __m64 __m2) | 620 _mm_andnot_si64 (__m64 __m1, __m64 __m2) |
605 { | 621 { |
606 return __builtin_arm_wandn (__m1, __m2); | 622 return __builtin_arm_wandn (__m2, __m1); |
607 } | 623 } |
608 | 624 |
609 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ | 625 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ |
610 static __inline __m64 | 626 static __inline __m64 |
611 _mm_or_si64 (__m64 __m1, __m64 __m2) | 627 _mm_or_si64 (__m64 __m1, __m64 __m2) |
933 values in A and B. Return the value in the lower 16-bit word; the | 949 values in A and B. Return the value in the lower 16-bit word; the |
934 upper words are cleared. */ | 950 upper words are cleared. */ |
935 static __inline __m64 | 951 static __inline __m64 |
936 _mm_sad_pu8 (__m64 __A, __m64 __B) | 952 _mm_sad_pu8 (__m64 __A, __m64 __B) |
937 { | 953 { |
938 return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B); | 954 return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B); |
955 } | |
956 | |
957 static __inline __m64 | |
958 _mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C) | |
959 { | |
960 return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C); | |
939 } | 961 } |
940 | 962 |
941 /* Compute the sum of the absolute differences of the unsigned 16-bit | 963 /* Compute the sum of the absolute differences of the unsigned 16-bit |
942 values in A and B. Return the value in the lower 32-bit word; the | 964 values in A and B. Return the value in the lower 32-bit word; the |
943 upper words are cleared. */ | 965 upper words are cleared. */ |
944 static __inline __m64 | 966 static __inline __m64 |
945 _mm_sad_pu16 (__m64 __A, __m64 __B) | 967 _mm_sad_pu16 (__m64 __A, __m64 __B) |
946 { | 968 { |
947 return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B); | 969 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); |
948 } | 970 } |
971 | |
972 static __inline __m64 | |
973 _mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C) | |
974 { | |
975 return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C); | |
976 } | |
977 | |
949 | 978 |
950 /* Compute the sum of the absolute differences of the unsigned 8-bit | 979 /* Compute the sum of the absolute differences of the unsigned 8-bit |
951 values in A and B. Return the value in the lower 16-bit word; the | 980 values in A and B. Return the value in the lower 16-bit word; the |
952 upper words are cleared. */ | 981 upper words are cleared. */ |
953 static __inline __m64 | 982 static __inline __m64 |
963 _mm_sadz_pu16 (__m64 __A, __m64 __B) | 992 _mm_sadz_pu16 (__m64 __A, __m64 __B) |
964 { | 993 { |
965 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); | 994 return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); |
966 } | 995 } |
967 | 996 |
968 static __inline __m64 | 997 #define _mm_align_si64(__A,__B, N) \ |
969 _mm_align_si64 (__m64 __A, __m64 __B, int __C) | 998 (__m64) __builtin_arm_walign ((__v8qi) (__A),(__v8qi) (__B), (N)) |
970 { | |
971 return (__m64) __builtin_arm_walign ((__v8qi)__A, (__v8qi)__B, __C); | |
972 } | |
973 | 999 |
974 /* Creates a 64-bit zero. */ | 1000 /* Creates a 64-bit zero. */ |
975 static __inline __m64 | 1001 static __inline __m64 |
976 _mm_setzero_si64 (void) | 1002 _mm_setzero_si64 (void) |
977 { | 1003 { |
985 static __inline void | 1011 static __inline void |
986 _mm_setwcx (const int __value, const int __regno) | 1012 _mm_setwcx (const int __value, const int __regno) |
987 { | 1013 { |
988 switch (__regno) | 1014 switch (__regno) |
989 { | 1015 { |
990 case 0: __builtin_arm_setwcx (__value, 0); break; | 1016 case 0: |
991 case 1: __builtin_arm_setwcx (__value, 1); break; | 1017 __asm __volatile ("tmcr wcid, %0" :: "r"(__value)); |
992 case 2: __builtin_arm_setwcx (__value, 2); break; | 1018 break; |
993 case 3: __builtin_arm_setwcx (__value, 3); break; | 1019 case 1: |
994 case 8: __builtin_arm_setwcx (__value, 8); break; | 1020 __asm __volatile ("tmcr wcon, %0" :: "r"(__value)); |
995 case 9: __builtin_arm_setwcx (__value, 9); break; | 1021 break; |
996 case 10: __builtin_arm_setwcx (__value, 10); break; | 1022 case 2: |
997 case 11: __builtin_arm_setwcx (__value, 11); break; | 1023 __asm __volatile ("tmcr wcssf, %0" :: "r"(__value)); |
998 default: break; | 1024 break; |
1025 case 3: | |
1026 __asm __volatile ("tmcr wcasf, %0" :: "r"(__value)); | |
1027 break; | |
1028 case 8: | |
1029 __builtin_arm_setwcgr0 (__value); | |
1030 break; | |
1031 case 9: | |
1032 __builtin_arm_setwcgr1 (__value); | |
1033 break; | |
1034 case 10: | |
1035 __builtin_arm_setwcgr2 (__value); | |
1036 break; | |
1037 case 11: | |
1038 __builtin_arm_setwcgr3 (__value); | |
1039 break; | |
1040 default: | |
1041 break; | |
999 } | 1042 } |
1000 } | 1043 } |
1001 | 1044 |
1002 static __inline int | 1045 static __inline int |
1003 _mm_getwcx (const int __regno) | 1046 _mm_getwcx (const int __regno) |
1004 { | 1047 { |
1048 int __value; | |
1005 switch (__regno) | 1049 switch (__regno) |
1006 { | 1050 { |
1007 case 0: return __builtin_arm_getwcx (0); | 1051 case 0: |
1008 case 1: return __builtin_arm_getwcx (1); | 1052 __asm __volatile ("tmrc %0, wcid" : "=r"(__value)); |
1009 case 2: return __builtin_arm_getwcx (2); | 1053 break; |
1010 case 3: return __builtin_arm_getwcx (3); | 1054 case 1: |
1011 case 8: return __builtin_arm_getwcx (8); | 1055 __asm __volatile ("tmrc %0, wcon" : "=r"(__value)); |
1012 case 9: return __builtin_arm_getwcx (9); | 1056 break; |
1013 case 10: return __builtin_arm_getwcx (10); | 1057 case 2: |
1014 case 11: return __builtin_arm_getwcx (11); | 1058 __asm __volatile ("tmrc %0, wcssf" : "=r"(__value)); |
1015 default: return 0; | 1059 break; |
1060 case 3: | |
1061 __asm __volatile ("tmrc %0, wcasf" : "=r"(__value)); | |
1062 break; | |
1063 case 8: | |
1064 return __builtin_arm_getwcgr0 (); | |
1065 case 9: | |
1066 return __builtin_arm_getwcgr1 (); | |
1067 case 10: | |
1068 return __builtin_arm_getwcgr2 (); | |
1069 case 11: | |
1070 return __builtin_arm_getwcgr3 (); | |
1071 default: | |
1072 break; | |
1016 } | 1073 } |
1074 return __value; | |
1017 } | 1075 } |
1018 | 1076 |
1019 /* Creates a vector of two 32-bit values; I0 is least significant. */ | 1077 /* Creates a vector of two 32-bit values; I0 is least significant. */ |
1020 static __inline __m64 | 1078 static __inline __m64 |
1021 _mm_set_pi32 (int __i1, int __i0) | 1079 _mm_set_pi32 (int __i1, int __i0) |
1022 { | 1080 { |
1023 union { | 1081 union |
1082 { | |
1024 __m64 __q; | 1083 __m64 __q; |
1025 struct { | 1084 struct |
1085 { | |
1026 unsigned int __i0; | 1086 unsigned int __i0; |
1027 unsigned int __i1; | 1087 unsigned int __i1; |
1028 } __s; | 1088 } __s; |
1029 } __u; | 1089 } __u; |
1030 | 1090 |
1036 | 1096 |
1037 /* Creates a vector of four 16-bit values; W0 is least significant. */ | 1097 /* Creates a vector of four 16-bit values; W0 is least significant. */ |
1038 static __inline __m64 | 1098 static __inline __m64 |
1039 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) | 1099 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) |
1040 { | 1100 { |
1041 unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2; | 1101 unsigned int __i1 = (unsigned short) __w3 << 16 | (unsigned short) __w2; |
1042 unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0; | 1102 unsigned int __i0 = (unsigned short) __w1 << 16 | (unsigned short) __w0; |
1103 | |
1043 return _mm_set_pi32 (__i1, __i0); | 1104 return _mm_set_pi32 (__i1, __i0); |
1044 | |
1045 } | 1105 } |
1046 | 1106 |
1047 /* Creates a vector of eight 8-bit values; B0 is least significant. */ | 1107 /* Creates a vector of eight 8-bit values; B0 is least significant. */ |
1048 static __inline __m64 | 1108 static __inline __m64 |
1049 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, | 1109 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, |
1106 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; | 1166 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; |
1107 unsigned int __i = __w << 16 | __w; | 1167 unsigned int __i = __w << 16 | __w; |
1108 return _mm_set1_pi32 (__i); | 1168 return _mm_set1_pi32 (__i); |
1109 } | 1169 } |
1110 | 1170 |
1111 /* Convert an integer to a __m64 object. */ | 1171 #ifdef __IWMMXT2__ |
1112 static __inline __m64 | 1172 static __inline __m64 |
1113 _m_from_int (int __a) | 1173 _mm_abs_pi8 (__m64 m1) |
1114 { | 1174 { |
1115 return (__m64)__a; | 1175 return (__m64) __builtin_arm_wabsb ((__v8qi)m1); |
1116 } | 1176 } |
1117 | 1177 |
1178 static __inline __m64 | |
1179 _mm_abs_pi16 (__m64 m1) | |
1180 { | |
1181 return (__m64) __builtin_arm_wabsh ((__v4hi)m1); | |
1182 | |
1183 } | |
1184 | |
1185 static __inline __m64 | |
1186 _mm_abs_pi32 (__m64 m1) | |
1187 { | |
1188 return (__m64) __builtin_arm_wabsw ((__v2si)m1); | |
1189 | |
1190 } | |
1191 | |
1192 static __inline __m64 | |
1193 _mm_addsubhx_pi16 (__m64 a, __m64 b) | |
1194 { | |
1195 return (__m64) __builtin_arm_waddsubhx ((__v4hi)a, (__v4hi)b); | |
1196 } | |
1197 | |
1198 static __inline __m64 | |
1199 _mm_absdiff_pu8 (__m64 a, __m64 b) | |
1200 { | |
1201 return (__m64) __builtin_arm_wabsdiffb ((__v8qi)a, (__v8qi)b); | |
1202 } | |
1203 | |
1204 static __inline __m64 | |
1205 _mm_absdiff_pu16 (__m64 a, __m64 b) | |
1206 { | |
1207 return (__m64) __builtin_arm_wabsdiffh ((__v4hi)a, (__v4hi)b); | |
1208 } | |
1209 | |
1210 static __inline __m64 | |
1211 _mm_absdiff_pu32 (__m64 a, __m64 b) | |
1212 { | |
1213 return (__m64) __builtin_arm_wabsdiffw ((__v2si)a, (__v2si)b); | |
1214 } | |
1215 | |
1216 static __inline __m64 | |
1217 _mm_addc_pu16 (__m64 a, __m64 b) | |
1218 { | |
1219 __m64 result; | |
1220 __asm__ __volatile__ ("waddhc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b)); | |
1221 return result; | |
1222 } | |
1223 | |
1224 static __inline __m64 | |
1225 _mm_addc_pu32 (__m64 a, __m64 b) | |
1226 { | |
1227 __m64 result; | |
1228 __asm__ __volatile__ ("waddwc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b)); | |
1229 return result; | |
1230 } | |
1231 | |
1232 static __inline __m64 | |
1233 _mm_avg4_pu8 (__m64 a, __m64 b) | |
1234 { | |
1235 return (__m64) __builtin_arm_wavg4 ((__v8qi)a, (__v8qi)b); | |
1236 } | |
1237 | |
1238 static __inline __m64 | |
1239 _mm_avg4r_pu8 (__m64 a, __m64 b) | |
1240 { | |
1241 return (__m64) __builtin_arm_wavg4r ((__v8qi)a, (__v8qi)b); | |
1242 } | |
1243 | |
1244 static __inline __m64 | |
1245 _mm_maddx_pi16 (__m64 a, __m64 b) | |
1246 { | |
1247 return (__m64) __builtin_arm_wmaddsx ((__v4hi)a, (__v4hi)b); | |
1248 } | |
1249 | |
1250 static __inline __m64 | |
1251 _mm_maddx_pu16 (__m64 a, __m64 b) | |
1252 { | |
1253 return (__m64) __builtin_arm_wmaddux ((__v4hi)a, (__v4hi)b); | |
1254 } | |
1255 | |
1256 static __inline __m64 | |
1257 _mm_msub_pi16 (__m64 a, __m64 b) | |
1258 { | |
1259 return (__m64) __builtin_arm_wmaddsn ((__v4hi)a, (__v4hi)b); | |
1260 } | |
1261 | |
1262 static __inline __m64 | |
1263 _mm_msub_pu16 (__m64 a, __m64 b) | |
1264 { | |
1265 return (__m64) __builtin_arm_wmaddun ((__v4hi)a, (__v4hi)b); | |
1266 } | |
1267 | |
1268 static __inline __m64 | |
1269 _mm_mulhi_pi32 (__m64 a, __m64 b) | |
1270 { | |
1271 return (__m64) __builtin_arm_wmulwsm ((__v2si)a, (__v2si)b); | |
1272 } | |
1273 | |
1274 static __inline __m64 | |
1275 _mm_mulhi_pu32 (__m64 a, __m64 b) | |
1276 { | |
1277 return (__m64) __builtin_arm_wmulwum ((__v2si)a, (__v2si)b); | |
1278 } | |
1279 | |
1280 static __inline __m64 | |
1281 _mm_mulhir_pi16 (__m64 a, __m64 b) | |
1282 { | |
1283 return (__m64) __builtin_arm_wmulsmr ((__v4hi)a, (__v4hi)b); | |
1284 } | |
1285 | |
1286 static __inline __m64 | |
1287 _mm_mulhir_pi32 (__m64 a, __m64 b) | |
1288 { | |
1289 return (__m64) __builtin_arm_wmulwsmr ((__v2si)a, (__v2si)b); | |
1290 } | |
1291 | |
1292 static __inline __m64 | |
1293 _mm_mulhir_pu16 (__m64 a, __m64 b) | |
1294 { | |
1295 return (__m64) __builtin_arm_wmulumr ((__v4hi)a, (__v4hi)b); | |
1296 } | |
1297 | |
1298 static __inline __m64 | |
1299 _mm_mulhir_pu32 (__m64 a, __m64 b) | |
1300 { | |
1301 return (__m64) __builtin_arm_wmulwumr ((__v2si)a, (__v2si)b); | |
1302 } | |
1303 | |
1304 static __inline __m64 | |
1305 _mm_mullo_pi32 (__m64 a, __m64 b) | |
1306 { | |
1307 return (__m64) __builtin_arm_wmulwl ((__v2si)a, (__v2si)b); | |
1308 } | |
1309 | |
1310 static __inline __m64 | |
1311 _mm_qmulm_pi16 (__m64 a, __m64 b) | |
1312 { | |
1313 return (__m64) __builtin_arm_wqmulm ((__v4hi)a, (__v4hi)b); | |
1314 } | |
1315 | |
1316 static __inline __m64 | |
1317 _mm_qmulm_pi32 (__m64 a, __m64 b) | |
1318 { | |
1319 return (__m64) __builtin_arm_wqmulwm ((__v2si)a, (__v2si)b); | |
1320 } | |
1321 | |
1322 static __inline __m64 | |
1323 _mm_qmulmr_pi16 (__m64 a, __m64 b) | |
1324 { | |
1325 return (__m64) __builtin_arm_wqmulmr ((__v4hi)a, (__v4hi)b); | |
1326 } | |
1327 | |
1328 static __inline __m64 | |
1329 _mm_qmulmr_pi32 (__m64 a, __m64 b) | |
1330 { | |
1331 return (__m64) __builtin_arm_wqmulwmr ((__v2si)a, (__v2si)b); | |
1332 } | |
1333 | |
1334 static __inline __m64 | |
1335 _mm_subaddhx_pi16 (__m64 a, __m64 b) | |
1336 { | |
1337 return (__m64) __builtin_arm_wsubaddhx ((__v4hi)a, (__v4hi)b); | |
1338 } | |
1339 | |
1340 static __inline __m64 | |
1341 _mm_addbhusl_pu8 (__m64 a, __m64 b) | |
1342 { | |
1343 return (__m64) __builtin_arm_waddbhusl ((__v4hi)a, (__v8qi)b); | |
1344 } | |
1345 | |
1346 static __inline __m64 | |
1347 _mm_addbhusm_pu8 (__m64 a, __m64 b) | |
1348 { | |
1349 return (__m64) __builtin_arm_waddbhusm ((__v4hi)a, (__v8qi)b); | |
1350 } | |
1351 | |
1352 #define _mm_qmiabb_pi32(acc, m1, m2) \ | |
1353 ({\ | |
1354 __m64 _acc = acc;\ | |
1355 __m64 _m1 = m1;\ | |
1356 __m64 _m2 = m2;\ | |
1357 _acc = (__m64) __builtin_arm_wqmiabb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1358 _acc;\ | |
1359 }) | |
1360 | |
1361 #define _mm_qmiabbn_pi32(acc, m1, m2) \ | |
1362 ({\ | |
1363 __m64 _acc = acc;\ | |
1364 __m64 _m1 = m1;\ | |
1365 __m64 _m2 = m2;\ | |
1366 _acc = (__m64) __builtin_arm_wqmiabbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1367 _acc;\ | |
1368 }) | |
1369 | |
1370 #define _mm_qmiabt_pi32(acc, m1, m2) \ | |
1371 ({\ | |
1372 __m64 _acc = acc;\ | |
1373 __m64 _m1 = m1;\ | |
1374 __m64 _m2 = m2;\ | |
1375 _acc = (__m64) __builtin_arm_wqmiabt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1376 _acc;\ | |
1377 }) | |
1378 | |
1379 #define _mm_qmiabtn_pi32(acc, m1, m2) \ | |
1380 ({\ | |
1381 __m64 _acc=acc;\ | |
1382 __m64 _m1=m1;\ | |
1383 __m64 _m2=m2;\ | |
1384 _acc = (__m64) __builtin_arm_wqmiabtn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1385 _acc;\ | |
1386 }) | |
1387 | |
1388 #define _mm_qmiatb_pi32(acc, m1, m2) \ | |
1389 ({\ | |
1390 __m64 _acc = acc;\ | |
1391 __m64 _m1 = m1;\ | |
1392 __m64 _m2 = m2;\ | |
1393 _acc = (__m64) __builtin_arm_wqmiatb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1394 _acc;\ | |
1395 }) | |
1396 | |
1397 #define _mm_qmiatbn_pi32(acc, m1, m2) \ | |
1398 ({\ | |
1399 __m64 _acc = acc;\ | |
1400 __m64 _m1 = m1;\ | |
1401 __m64 _m2 = m2;\ | |
1402 _acc = (__m64) __builtin_arm_wqmiatbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1403 _acc;\ | |
1404 }) | |
1405 | |
1406 #define _mm_qmiatt_pi32(acc, m1, m2) \ | |
1407 ({\ | |
1408 __m64 _acc = acc;\ | |
1409 __m64 _m1 = m1;\ | |
1410 __m64 _m2 = m2;\ | |
1411 _acc = (__m64) __builtin_arm_wqmiatt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1412 _acc;\ | |
1413 }) | |
1414 | |
1415 #define _mm_qmiattn_pi32(acc, m1, m2) \ | |
1416 ({\ | |
1417 __m64 _acc = acc;\ | |
1418 __m64 _m1 = m1;\ | |
1419 __m64 _m2 = m2;\ | |
1420 _acc = (__m64) __builtin_arm_wqmiattn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1421 _acc;\ | |
1422 }) | |
1423 | |
1424 #define _mm_wmiabb_si64(acc, m1, m2) \ | |
1425 ({\ | |
1426 __m64 _acc = acc;\ | |
1427 __m64 _m1 = m1;\ | |
1428 __m64 _m2 = m2;\ | |
1429 _acc = (__m64) __builtin_arm_wmiabb (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1430 _acc;\ | |
1431 }) | |
1432 | |
1433 #define _mm_wmiabbn_si64(acc, m1, m2) \ | |
1434 ({\ | |
1435 __m64 _acc = acc;\ | |
1436 __m64 _m1 = m1;\ | |
1437 __m64 _m2 = m2;\ | |
1438 _acc = (__m64) __builtin_arm_wmiabbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1439 _acc;\ | |
1440 }) | |
1441 | |
1442 #define _mm_wmiabt_si64(acc, m1, m2) \ | |
1443 ({\ | |
1444 __m64 _acc = acc;\ | |
1445 __m64 _m1 = m1;\ | |
1446 __m64 _m2 = m2;\ | |
1447 _acc = (__m64) __builtin_arm_wmiabt (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1448 _acc;\ | |
1449 }) | |
1450 | |
1451 #define _mm_wmiabtn_si64(acc, m1, m2) \ | |
1452 ({\ | |
1453 __m64 _acc = acc;\ | |
1454 __m64 _m1 = m1;\ | |
1455 __m64 _m2 = m2;\ | |
1456 _acc = (__m64) __builtin_arm_wmiabtn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1457 _acc;\ | |
1458 }) | |
1459 | |
1460 #define _mm_wmiatb_si64(acc, m1, m2) \ | |
1461 ({\ | |
1462 __m64 _acc = acc;\ | |
1463 __m64 _m1 = m1;\ | |
1464 __m64 _m2 = m2;\ | |
1465 _acc = (__m64) __builtin_arm_wmiatb (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1466 _acc;\ | |
1467 }) | |
1468 | |
1469 #define _mm_wmiatbn_si64(acc, m1, m2) \ | |
1470 ({\ | |
1471 __m64 _acc = acc;\ | |
1472 __m64 _m1 = m1;\ | |
1473 __m64 _m2 = m2;\ | |
1474 _acc = (__m64) __builtin_arm_wmiatbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1475 _acc;\ | |
1476 }) | |
1477 | |
1478 #define _mm_wmiatt_si64(acc, m1, m2) \ | |
1479 ({\ | |
1480 __m64 _acc = acc;\ | |
1481 __m64 _m1 = m1;\ | |
1482 __m64 _m2 = m2;\ | |
1483 _acc = (__m64) __builtin_arm_wmiatt (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1484 _acc;\ | |
1485 }) | |
1486 | |
1487 #define _mm_wmiattn_si64(acc, m1, m2) \ | |
1488 ({\ | |
1489 __m64 _acc = acc;\ | |
1490 __m64 _m1 = m1;\ | |
1491 __m64 _m2 = m2;\ | |
1492 _acc = (__m64) __builtin_arm_wmiattn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ | |
1493 _acc;\ | |
1494 }) | |
1495 | |
1496 #define _mm_wmiawbb_si64(acc, m1, m2) \ | |
1497 ({\ | |
1498 __m64 _acc = acc;\ | |
1499 __m64 _m1 = m1;\ | |
1500 __m64 _m2 = m2;\ | |
1501 _acc = (__m64) __builtin_arm_wmiawbb (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1502 _acc;\ | |
1503 }) | |
1504 | |
1505 #define _mm_wmiawbbn_si64(acc, m1, m2) \ | |
1506 ({\ | |
1507 __m64 _acc = acc;\ | |
1508 __m64 _m1 = m1;\ | |
1509 __m64 _m2 = m2;\ | |
1510 _acc = (__m64) __builtin_arm_wmiawbbn (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1511 _acc;\ | |
1512 }) | |
1513 | |
1514 #define _mm_wmiawbt_si64(acc, m1, m2) \ | |
1515 ({\ | |
1516 __m64 _acc = acc;\ | |
1517 __m64 _m1 = m1;\ | |
1518 __m64 _m2 = m2;\ | |
1519 _acc = (__m64) __builtin_arm_wmiawbt (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1520 _acc;\ | |
1521 }) | |
1522 | |
1523 #define _mm_wmiawbtn_si64(acc, m1, m2) \ | |
1524 ({\ | |
1525 __m64 _acc = acc;\ | |
1526 __m64 _m1 = m1;\ | |
1527 __m64 _m2 = m2;\ | |
1528 _acc = (__m64) __builtin_arm_wmiawbtn (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1529 _acc;\ | |
1530 }) | |
1531 | |
1532 #define _mm_wmiawtb_si64(acc, m1, m2) \ | |
1533 ({\ | |
1534 __m64 _acc = acc;\ | |
1535 __m64 _m1 = m1;\ | |
1536 __m64 _m2 = m2;\ | |
1537 _acc = (__m64) __builtin_arm_wmiawtb (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1538 _acc;\ | |
1539 }) | |
1540 | |
1541 #define _mm_wmiawtbn_si64(acc, m1, m2) \ | |
1542 ({\ | |
1543 __m64 _acc = acc;\ | |
1544 __m64 _m1 = m1;\ | |
1545 __m64 _m2 = m2;\ | |
1546 _acc = (__m64) __builtin_arm_wmiawtbn (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1547 _acc;\ | |
1548 }) | |
1549 | |
1550 #define _mm_wmiawtt_si64(acc, m1, m2) \ | |
1551 ({\ | |
1552 __m64 _acc = acc;\ | |
1553 __m64 _m1 = m1;\ | |
1554 __m64 _m2 = m2;\ | |
1555 _acc = (__m64) __builtin_arm_wmiawtt (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1556 _acc;\ | |
1557 }) | |
1558 | |
1559 #define _mm_wmiawttn_si64(acc, m1, m2) \ | |
1560 ({\ | |
1561 __m64 _acc = acc;\ | |
1562 __m64 _m1 = m1;\ | |
1563 __m64 _m2 = m2;\ | |
1564 _acc = (__m64) __builtin_arm_wmiawttn (_acc, (__v2si)_m1, (__v2si)_m2);\ | |
1565 _acc;\ | |
1566 }) | |
1567 | |
1568 /* The third arguments should be an immediate. */ | |
1569 #define _mm_merge_si64(a, b, n) \ | |
1570 ({\ | |
1571 __m64 result;\ | |
1572 result = (__m64) __builtin_arm_wmerge ((__m64) (a), (__m64) (b), (n));\ | |
1573 result;\ | |
1574 }) | |
1575 #endif /* __IWMMXT2__ */ | |
1576 | |
1577 static __inline __m64 | |
1578 _mm_alignr0_si64 (__m64 a, __m64 b) | |
1579 { | |
1580 return (__m64) __builtin_arm_walignr0 ((__v8qi) a, (__v8qi) b); | |
1581 } | |
1582 | |
1583 static __inline __m64 | |
1584 _mm_alignr1_si64 (__m64 a, __m64 b) | |
1585 { | |
1586 return (__m64) __builtin_arm_walignr1 ((__v8qi) a, (__v8qi) b); | |
1587 } | |
1588 | |
1589 static __inline __m64 | |
1590 _mm_alignr2_si64 (__m64 a, __m64 b) | |
1591 { | |
1592 return (__m64) __builtin_arm_walignr2 ((__v8qi) a, (__v8qi) b); | |
1593 } | |
1594 | |
1595 static __inline __m64 | |
1596 _mm_alignr3_si64 (__m64 a, __m64 b) | |
1597 { | |
1598 return (__m64) __builtin_arm_walignr3 ((__v8qi) a, (__v8qi) b); | |
1599 } | |
1600 | |
1601 static __inline void | |
1602 _mm_tandcb () | |
1603 { | |
1604 __asm __volatile ("tandcb r15"); | |
1605 } | |
1606 | |
1607 static __inline void | |
1608 _mm_tandch () | |
1609 { | |
1610 __asm __volatile ("tandch r15"); | |
1611 } | |
1612 | |
1613 static __inline void | |
1614 _mm_tandcw () | |
1615 { | |
1616 __asm __volatile ("tandcw r15"); | |
1617 } | |
1618 | |
1619 #define _mm_textrcb(n) \ | |
1620 ({\ | |
1621 __asm__ __volatile__ (\ | |
1622 "textrcb r15, %0" : : "i" (n));\ | |
1623 }) | |
1624 | |
1625 #define _mm_textrch(n) \ | |
1626 ({\ | |
1627 __asm__ __volatile__ (\ | |
1628 "textrch r15, %0" : : "i" (n));\ | |
1629 }) | |
1630 | |
1631 #define _mm_textrcw(n) \ | |
1632 ({\ | |
1633 __asm__ __volatile__ (\ | |
1634 "textrcw r15, %0" : : "i" (n));\ | |
1635 }) | |
1636 | |
1637 static __inline void | |
1638 _mm_torcb () | |
1639 { | |
1640 __asm __volatile ("torcb r15"); | |
1641 } | |
1642 | |
1643 static __inline void | |
1644 _mm_torch () | |
1645 { | |
1646 __asm __volatile ("torch r15"); | |
1647 } | |
1648 | |
1649 static __inline void | |
1650 _mm_torcw () | |
1651 { | |
1652 __asm __volatile ("torcw r15"); | |
1653 } | |
1654 | |
1655 #ifdef __IWMMXT2__ | |
1656 static __inline void | |
1657 _mm_torvscb () | |
1658 { | |
1659 __asm __volatile ("torvscb r15"); | |
1660 } | |
1661 | |
1662 static __inline void | |
1663 _mm_torvsch () | |
1664 { | |
1665 __asm __volatile ("torvsch r15"); | |
1666 } | |
1667 | |
1668 static __inline void | |
1669 _mm_torvscw () | |
1670 { | |
1671 __asm __volatile ("torvscw r15"); | |
1672 } | |
1673 #endif /* __IWMMXT2__ */ | |
1674 | |
1675 static __inline __m64 | |
1676 _mm_tbcst_pi8 (int value) | |
1677 { | |
1678 return (__m64) __builtin_arm_tbcstb ((signed char) value); | |
1679 } | |
1680 | |
1681 static __inline __m64 | |
1682 _mm_tbcst_pi16 (int value) | |
1683 { | |
1684 return (__m64) __builtin_arm_tbcsth ((short) value); | |
1685 } | |
1686 | |
1687 static __inline __m64 | |
1688 _mm_tbcst_pi32 (int value) | |
1689 { | |
1690 return (__m64) __builtin_arm_tbcstw (value); | |
1691 } | |
1692 | |
1693 #define _m_empty _mm_empty | |
1118 #define _m_packsswb _mm_packs_pi16 | 1694 #define _m_packsswb _mm_packs_pi16 |
1119 #define _m_packssdw _mm_packs_pi32 | 1695 #define _m_packssdw _mm_packs_pi32 |
1120 #define _m_packuswb _mm_packs_pu16 | 1696 #define _m_packuswb _mm_packs_pu16 |
1121 #define _m_packusdw _mm_packs_pu32 | 1697 #define _m_packusdw _mm_packs_pu32 |
1122 #define _m_packssqd _mm_packs_pi64 | 1698 #define _m_packssqd _mm_packs_pi64 |
1248 #define _m_psadzbw _mm_sadz_pu8 | 1824 #define _m_psadzbw _mm_sadz_pu8 |
1249 #define _m_psadzwd _mm_sadz_pu16 | 1825 #define _m_psadzwd _mm_sadz_pu16 |
1250 #define _m_paligniq _mm_align_si64 | 1826 #define _m_paligniq _mm_align_si64 |
1251 #define _m_cvt_si2pi _mm_cvtsi64_m64 | 1827 #define _m_cvt_si2pi _mm_cvtsi64_m64 |
1252 #define _m_cvt_pi2si _mm_cvtm64_si64 | 1828 #define _m_cvt_pi2si _mm_cvtm64_si64 |
1829 #define _m_from_int _mm_cvtsi32_si64 | |
1830 #define _m_to_int _mm_cvtsi64_si32 | |
1831 | |
1832 #if defined __cplusplus | |
1833 }; /* End "C" */ | |
1834 #endif /* __cplusplus */ | |
1253 | 1835 |
1254 #endif /* _MMINTRIN_H_INCLUDED */ | 1836 #endif /* _MMINTRIN_H_INCLUDED */ |