Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/rs6000/mmintrin.h @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Copyright (C) 2002-2018 Free Software Foundation, Inc. | 1 /* Copyright (C) 2002-2020 Free Software Foundation, Inc. |
2 | 2 |
3 This file is part of GCC. | 3 This file is part of GCC. |
4 | 4 |
5 GCC is free software; you can redistribute it and/or modify | 5 GCC is free software; you can redistribute it and/or modify |
6 it under the terms of the GNU General Public License as published by | 6 it under the terms of the GNU General Public License as published by |
110 _m_to_int (__m64 __i) | 110 _m_to_int (__m64 __i) |
111 { | 111 { |
112 return _mm_cvtsi64_si32 (__i); | 112 return _mm_cvtsi64_si32 (__i); |
113 } | 113 } |
114 | 114 |
115 #ifdef __powerpc64__ | |
116 /* Convert I to a __m64 object. */ | 115 /* Convert I to a __m64 object. */ |
117 | 116 |
118 /* Intel intrinsic. */ | 117 /* Intel intrinsic. */ |
119 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 118 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
120 _m_from_int64 (long long __i) | 119 _m_from_int64 (long long __i) |
171 _mm_packs_pi16 (__m64 __m1, __m64 __m2) | 170 _mm_packs_pi16 (__m64 __m1, __m64 __m2) |
172 { | 171 { |
173 __vector signed short vm1; | 172 __vector signed short vm1; |
174 __vector signed char vresult; | 173 __vector signed char vresult; |
175 | 174 |
176 vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); | 175 vm1 = (__vector signed short) (__vector unsigned long long) |
177 vresult = vec_vpkshss (vm1, vm1); | 176 #ifdef __LITTLE_ENDIAN__ |
178 return (__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0); | 177 { __m1, __m2 }; |
178 #else | |
179 { __m2, __m1 }; | |
180 #endif | |
181 vresult = vec_packs (vm1, vm1); | |
182 return (__m64) ((__vector long long) vresult)[0]; | |
179 } | 183 } |
180 | 184 |
181 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 185 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
182 _m_packsswb (__m64 __m1, __m64 __m2) | 186 _m_packsswb (__m64 __m1, __m64 __m2) |
183 { | 187 { |
191 _mm_packs_pi32 (__m64 __m1, __m64 __m2) | 195 _mm_packs_pi32 (__m64 __m1, __m64 __m2) |
192 { | 196 { |
193 __vector signed int vm1; | 197 __vector signed int vm1; |
194 __vector signed short vresult; | 198 __vector signed short vresult; |
195 | 199 |
196 vm1 = (__vector signed int)__builtin_pack_vector_int128 (__m2, __m1); | 200 vm1 = (__vector signed int) (__vector unsigned long long) |
197 vresult = vec_vpkswss (vm1, vm1); | 201 #ifdef __LITTLE_ENDIAN__ |
198 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); | 202 { __m1, __m2 }; |
203 #else | |
204 { __m2, __m1 }; | |
205 #endif | |
206 vresult = vec_packs (vm1, vm1); | |
207 return (__m64) ((__vector long long) vresult)[0]; | |
199 } | 208 } |
200 | 209 |
201 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 210 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
202 _m_packssdw (__m64 __m1, __m64 __m2) | 211 _m_packssdw (__m64 __m1, __m64 __m2) |
203 { | 212 { |
208 the result, and the four 16-bit values from M2 into the upper four 8-bit | 217 the result, and the four 16-bit values from M2 into the upper four 8-bit |
209 values of the result, all with unsigned saturation. */ | 218 values of the result, all with unsigned saturation. */ |
210 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 219 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
211 _mm_packs_pu16 (__m64 __m1, __m64 __m2) | 220 _mm_packs_pu16 (__m64 __m1, __m64 __m2) |
212 { | 221 { |
213 __vector signed short vm1; | 222 __vector unsigned char r; |
214 __vector unsigned char vresult; | 223 __vector signed short vm1 = (__vector signed short) (__vector long long) |
215 | 224 #ifdef __LITTLE_ENDIAN__ |
216 vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); | 225 { __m1, __m2 }; |
217 vresult = vec_vpkshus (vm1, vm1); | 226 #else |
218 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); | 227 { __m2, __m1 }; |
228 #endif | |
229 const __vector signed short __zero = { 0 }; | |
230 __vector __bool short __select = vec_cmplt (vm1, __zero); | |
231 r = vec_packs ((__vector unsigned short) vm1, (__vector unsigned short) vm1); | |
232 __vector __bool char packsel = vec_pack (__select, __select); | |
233 r = vec_sel (r, (const __vector unsigned char) __zero, packsel); | |
234 return (__m64) ((__vector long long) r)[0]; | |
219 } | 235 } |
220 | 236 |
221 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 237 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
222 _m_packuswb (__m64 __m1, __m64 __m2) | 238 _m_packuswb (__m64 __m1, __m64 __m2) |
223 { | 239 { |
234 __vector unsigned char a, b, c; | 250 __vector unsigned char a, b, c; |
235 | 251 |
236 a = (__vector unsigned char)vec_splats (__m1); | 252 a = (__vector unsigned char)vec_splats (__m1); |
237 b = (__vector unsigned char)vec_splats (__m2); | 253 b = (__vector unsigned char)vec_splats (__m2); |
238 c = vec_mergel (a, b); | 254 c = vec_mergel (a, b); |
239 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 255 return (__m64) ((__vector long long) c)[1]; |
240 #else | 256 #else |
241 __m64_union m1, m2, res; | 257 __m64_union m1, m2, res; |
242 | 258 |
243 m1.as_m64 = __m1; | 259 m1.as_m64 = __m1; |
244 m2.as_m64 = __m2; | 260 m2.as_m64 = __m2; |
315 __vector unsigned char a, b, c; | 331 __vector unsigned char a, b, c; |
316 | 332 |
317 a = (__vector unsigned char)vec_splats (__m1); | 333 a = (__vector unsigned char)vec_splats (__m1); |
318 b = (__vector unsigned char)vec_splats (__m2); | 334 b = (__vector unsigned char)vec_splats (__m2); |
319 c = vec_mergel (a, b); | 335 c = vec_mergel (a, b); |
320 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 1)); | 336 return (__m64) ((__vector long long) c)[0]; |
321 #else | 337 #else |
322 __m64_union m1, m2, res; | 338 __m64_union m1, m2, res; |
323 | 339 |
324 m1.as_m64 = __m1; | 340 m1.as_m64 = __m1; |
325 m2.as_m64 = __m2; | 341 m2.as_m64 = __m2; |
396 __vector signed char a, b, c; | 412 __vector signed char a, b, c; |
397 | 413 |
398 a = (__vector signed char)vec_splats (__m1); | 414 a = (__vector signed char)vec_splats (__m1); |
399 b = (__vector signed char)vec_splats (__m2); | 415 b = (__vector signed char)vec_splats (__m2); |
400 c = vec_add (a, b); | 416 c = vec_add (a, b); |
401 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 417 return (__m64) ((__vector long long) c)[0]; |
402 #else | 418 #else |
403 __m64_union m1, m2, res; | 419 __m64_union m1, m2, res; |
404 | 420 |
405 m1.as_m64 = __m1; | 421 m1.as_m64 = __m1; |
406 m2.as_m64 = __m2; | 422 m2.as_m64 = __m2; |
432 __vector signed short a, b, c; | 448 __vector signed short a, b, c; |
433 | 449 |
434 a = (__vector signed short)vec_splats (__m1); | 450 a = (__vector signed short)vec_splats (__m1); |
435 b = (__vector signed short)vec_splats (__m2); | 451 b = (__vector signed short)vec_splats (__m2); |
436 c = vec_add (a, b); | 452 c = vec_add (a, b); |
437 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 453 return (__m64) ((__vector long long) c)[0]; |
438 #else | 454 #else |
439 __m64_union m1, m2, res; | 455 __m64_union m1, m2, res; |
440 | 456 |
441 m1.as_m64 = __m1; | 457 m1.as_m64 = __m1; |
442 m2.as_m64 = __m2; | 458 m2.as_m64 = __m2; |
464 __vector signed int a, b, c; | 480 __vector signed int a, b, c; |
465 | 481 |
466 a = (__vector signed int)vec_splats (__m1); | 482 a = (__vector signed int)vec_splats (__m1); |
467 b = (__vector signed int)vec_splats (__m2); | 483 b = (__vector signed int)vec_splats (__m2); |
468 c = vec_add (a, b); | 484 c = vec_add (a, b); |
469 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 485 return (__m64) ((__vector long long) c)[0]; |
470 #else | 486 #else |
471 __m64_union m1, m2, res; | 487 __m64_union m1, m2, res; |
472 | 488 |
473 m1.as_m64 = __m1; | 489 m1.as_m64 = __m1; |
474 m2.as_m64 = __m2; | 490 m2.as_m64 = __m2; |
494 __vector signed char a, b, c; | 510 __vector signed char a, b, c; |
495 | 511 |
496 a = (__vector signed char)vec_splats (__m1); | 512 a = (__vector signed char)vec_splats (__m1); |
497 b = (__vector signed char)vec_splats (__m2); | 513 b = (__vector signed char)vec_splats (__m2); |
498 c = vec_sub (a, b); | 514 c = vec_sub (a, b); |
499 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 515 return (__m64) ((__vector long long) c)[0]; |
500 #else | 516 #else |
501 __m64_union m1, m2, res; | 517 __m64_union m1, m2, res; |
502 | 518 |
503 m1.as_m64 = __m1; | 519 m1.as_m64 = __m1; |
504 m2.as_m64 = __m2; | 520 m2.as_m64 = __m2; |
530 __vector signed short a, b, c; | 546 __vector signed short a, b, c; |
531 | 547 |
532 a = (__vector signed short)vec_splats (__m1); | 548 a = (__vector signed short)vec_splats (__m1); |
533 b = (__vector signed short)vec_splats (__m2); | 549 b = (__vector signed short)vec_splats (__m2); |
534 c = vec_sub (a, b); | 550 c = vec_sub (a, b); |
535 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 551 return (__m64) ((__vector long long) c)[0]; |
536 #else | 552 #else |
537 __m64_union m1, m2, res; | 553 __m64_union m1, m2, res; |
538 | 554 |
539 m1.as_m64 = __m1; | 555 m1.as_m64 = __m1; |
540 m2.as_m64 = __m2; | 556 m2.as_m64 = __m2; |
562 __vector signed int a, b, c; | 578 __vector signed int a, b, c; |
563 | 579 |
564 a = (__vector signed int)vec_splats (__m1); | 580 a = (__vector signed int)vec_splats (__m1); |
565 b = (__vector signed int)vec_splats (__m2); | 581 b = (__vector signed int)vec_splats (__m2); |
566 c = vec_sub (a, b); | 582 c = vec_sub (a, b); |
567 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 583 return (__m64) ((__vector long long) c)[0]; |
568 #else | 584 #else |
569 __m64_union m1, m2, res; | 585 __m64_union m1, m2, res; |
570 | 586 |
571 m1.as_m64 = __m1; | 587 m1.as_m64 = __m1; |
572 m2.as_m64 = __m2; | 588 m2.as_m64 = __m2; |
579 } | 595 } |
580 | 596 |
581 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 597 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
582 _m_psubd (__m64 __m1, __m64 __m2) | 598 _m_psubd (__m64 __m1, __m64 __m2) |
583 { | 599 { |
584 return _mm_add_pi32 (__m1, __m2); | 600 return _mm_sub_pi32 (__m1, __m2); |
585 } | 601 } |
586 | 602 |
587 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 603 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
588 _mm_add_si64 (__m64 __m1, __m64 __m2) | 604 _mm_add_si64 (__m64 __m1, __m64 __m2) |
589 { | 605 { |
709 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the | 725 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the |
710 test is true and zero if false. */ | 726 test is true and zero if false. */ |
711 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 727 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
712 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) | 728 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) |
713 { | 729 { |
714 #ifdef _ARCH_PWR6 | 730 #if defined(_ARCH_PWR6) && defined(__powerpc64__) |
715 __m64 res; | 731 __m64 res; |
716 __asm__( | 732 __asm__( |
717 "cmpb %0,%1,%2;\n" | 733 "cmpb %0,%1,%2;\n" |
718 : "=r" (res) | 734 : "=r" (res) |
719 : "r" (__m1), | 735 : "r" (__m1), |
752 __vector signed char a, b, c; | 768 __vector signed char a, b, c; |
753 | 769 |
754 a = (__vector signed char)vec_splats (__m1); | 770 a = (__vector signed char)vec_splats (__m1); |
755 b = (__vector signed char)vec_splats (__m2); | 771 b = (__vector signed char)vec_splats (__m2); |
756 c = (__vector signed char)vec_cmpgt (a, b); | 772 c = (__vector signed char)vec_cmpgt (a, b); |
757 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 773 return (__m64) ((__vector long long) c)[0]; |
758 #else | 774 #else |
759 __m64_union m1, m2, res; | 775 __m64_union m1, m2, res; |
760 | 776 |
761 m1.as_m64 = __m1; | 777 m1.as_m64 = __m1; |
762 m2.as_m64 = __m2; | 778 m2.as_m64 = __m2; |
789 __vector signed short a, b, c; | 805 __vector signed short a, b, c; |
790 | 806 |
791 a = (__vector signed short)vec_splats (__m1); | 807 a = (__vector signed short)vec_splats (__m1); |
792 b = (__vector signed short)vec_splats (__m2); | 808 b = (__vector signed short)vec_splats (__m2); |
793 c = (__vector signed short)vec_cmpeq (a, b); | 809 c = (__vector signed short)vec_cmpeq (a, b); |
794 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 810 return (__m64) ((__vector long long) c)[0]; |
795 #else | 811 #else |
796 __m64_union m1, m2, res; | 812 __m64_union m1, m2, res; |
797 | 813 |
798 m1.as_m64 = __m1; | 814 m1.as_m64 = __m1; |
799 m2.as_m64 = __m2; | 815 m2.as_m64 = __m2; |
820 __vector signed short a, b, c; | 836 __vector signed short a, b, c; |
821 | 837 |
822 a = (__vector signed short)vec_splats (__m1); | 838 a = (__vector signed short)vec_splats (__m1); |
823 b = (__vector signed short)vec_splats (__m2); | 839 b = (__vector signed short)vec_splats (__m2); |
824 c = (__vector signed short)vec_cmpgt (a, b); | 840 c = (__vector signed short)vec_cmpgt (a, b); |
825 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 841 return (__m64) ((__vector long long) c)[0]; |
826 #else | 842 #else |
827 __m64_union m1, m2, res; | 843 __m64_union m1, m2, res; |
828 | 844 |
829 m1.as_m64 = __m1; | 845 m1.as_m64 = __m1; |
830 m2.as_m64 = __m2; | 846 m2.as_m64 = __m2; |
853 __vector signed int a, b, c; | 869 __vector signed int a, b, c; |
854 | 870 |
855 a = (__vector signed int)vec_splats (__m1); | 871 a = (__vector signed int)vec_splats (__m1); |
856 b = (__vector signed int)vec_splats (__m2); | 872 b = (__vector signed int)vec_splats (__m2); |
857 c = (__vector signed int)vec_cmpeq (a, b); | 873 c = (__vector signed int)vec_cmpeq (a, b); |
858 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 874 return (__m64) ((__vector long long) c)[0]; |
859 #else | 875 #else |
860 __m64_union m1, m2, res; | 876 __m64_union m1, m2, res; |
861 | 877 |
862 m1.as_m64 = __m1; | 878 m1.as_m64 = __m1; |
863 m2.as_m64 = __m2; | 879 m2.as_m64 = __m2; |
882 __vector signed int a, b, c; | 898 __vector signed int a, b, c; |
883 | 899 |
884 a = (__vector signed int)vec_splats (__m1); | 900 a = (__vector signed int)vec_splats (__m1); |
885 b = (__vector signed int)vec_splats (__m2); | 901 b = (__vector signed int)vec_splats (__m2); |
886 c = (__vector signed int)vec_cmpgt (a, b); | 902 c = (__vector signed int)vec_cmpgt (a, b); |
887 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 903 return (__m64) ((__vector long long) c)[0]; |
888 #else | 904 #else |
889 __m64_union m1, m2, res; | 905 __m64_union m1, m2, res; |
890 | 906 |
891 m1.as_m64 = __m1; | 907 m1.as_m64 = __m1; |
892 m2.as_m64 = __m2; | 908 m2.as_m64 = __m2; |
913 __vector signed char a, b, c; | 929 __vector signed char a, b, c; |
914 | 930 |
915 a = (__vector signed char)vec_splats (__m1); | 931 a = (__vector signed char)vec_splats (__m1); |
916 b = (__vector signed char)vec_splats (__m2); | 932 b = (__vector signed char)vec_splats (__m2); |
917 c = vec_adds (a, b); | 933 c = vec_adds (a, b); |
918 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 934 return (__m64) ((__vector long long) c)[0]; |
919 } | 935 } |
920 | 936 |
921 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 937 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
922 _m_paddsb (__m64 __m1, __m64 __m2) | 938 _m_paddsb (__m64 __m1, __m64 __m2) |
923 { | 939 { |
931 __vector signed short a, b, c; | 947 __vector signed short a, b, c; |
932 | 948 |
933 a = (__vector signed short)vec_splats (__m1); | 949 a = (__vector signed short)vec_splats (__m1); |
934 b = (__vector signed short)vec_splats (__m2); | 950 b = (__vector signed short)vec_splats (__m2); |
935 c = vec_adds (a, b); | 951 c = vec_adds (a, b); |
936 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 952 return (__m64) ((__vector long long) c)[0]; |
937 } | 953 } |
938 | 954 |
939 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 955 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
940 _m_paddsw (__m64 __m1, __m64 __m2) | 956 _m_paddsw (__m64 __m1, __m64 __m2) |
941 { | 957 { |
949 __vector unsigned char a, b, c; | 965 __vector unsigned char a, b, c; |
950 | 966 |
951 a = (__vector unsigned char)vec_splats (__m1); | 967 a = (__vector unsigned char)vec_splats (__m1); |
952 b = (__vector unsigned char)vec_splats (__m2); | 968 b = (__vector unsigned char)vec_splats (__m2); |
953 c = vec_adds (a, b); | 969 c = vec_adds (a, b); |
954 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 970 return (__m64) ((__vector long long) c)[0]; |
955 } | 971 } |
956 | 972 |
957 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 973 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
958 _m_paddusb (__m64 __m1, __m64 __m2) | 974 _m_paddusb (__m64 __m1, __m64 __m2) |
959 { | 975 { |
968 __vector unsigned short a, b, c; | 984 __vector unsigned short a, b, c; |
969 | 985 |
970 a = (__vector unsigned short)vec_splats (__m1); | 986 a = (__vector unsigned short)vec_splats (__m1); |
971 b = (__vector unsigned short)vec_splats (__m2); | 987 b = (__vector unsigned short)vec_splats (__m2); |
972 c = vec_adds (a, b); | 988 c = vec_adds (a, b); |
973 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 989 return (__m64) ((__vector long long) c)[0]; |
974 } | 990 } |
975 | 991 |
976 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 992 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
977 _m_paddusw (__m64 __m1, __m64 __m2) | 993 _m_paddusw (__m64 __m1, __m64 __m2) |
978 { | 994 { |
987 __vector signed char a, b, c; | 1003 __vector signed char a, b, c; |
988 | 1004 |
989 a = (__vector signed char)vec_splats (__m1); | 1005 a = (__vector signed char)vec_splats (__m1); |
990 b = (__vector signed char)vec_splats (__m2); | 1006 b = (__vector signed char)vec_splats (__m2); |
991 c = vec_subs (a, b); | 1007 c = vec_subs (a, b); |
992 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1008 return (__m64) ((__vector long long) c)[0]; |
993 } | 1009 } |
994 | 1010 |
995 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1011 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
996 _m_psubsb (__m64 __m1, __m64 __m2) | 1012 _m_psubsb (__m64 __m1, __m64 __m2) |
997 { | 1013 { |
1006 __vector signed short a, b, c; | 1022 __vector signed short a, b, c; |
1007 | 1023 |
1008 a = (__vector signed short)vec_splats (__m1); | 1024 a = (__vector signed short)vec_splats (__m1); |
1009 b = (__vector signed short)vec_splats (__m2); | 1025 b = (__vector signed short)vec_splats (__m2); |
1010 c = vec_subs (a, b); | 1026 c = vec_subs (a, b); |
1011 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1027 return (__m64) ((__vector long long) c)[0]; |
1012 } | 1028 } |
1013 | 1029 |
1014 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1030 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1015 _m_psubsw (__m64 __m1, __m64 __m2) | 1031 _m_psubsw (__m64 __m1, __m64 __m2) |
1016 { | 1032 { |
1025 __vector unsigned char a, b, c; | 1041 __vector unsigned char a, b, c; |
1026 | 1042 |
1027 a = (__vector unsigned char)vec_splats (__m1); | 1043 a = (__vector unsigned char)vec_splats (__m1); |
1028 b = (__vector unsigned char)vec_splats (__m2); | 1044 b = (__vector unsigned char)vec_splats (__m2); |
1029 c = vec_subs (a, b); | 1045 c = vec_subs (a, b); |
1030 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1046 return (__m64) ((__vector long long) c)[0]; |
1031 } | 1047 } |
1032 | 1048 |
1033 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1049 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1034 _m_psubusb (__m64 __m1, __m64 __m2) | 1050 _m_psubusb (__m64 __m1, __m64 __m2) |
1035 { | 1051 { |
1044 __vector unsigned short a, b, c; | 1060 __vector unsigned short a, b, c; |
1045 | 1061 |
1046 a = (__vector unsigned short)vec_splats (__m1); | 1062 a = (__vector unsigned short)vec_splats (__m1); |
1047 b = (__vector unsigned short)vec_splats (__m2); | 1063 b = (__vector unsigned short)vec_splats (__m2); |
1048 c = vec_subs (a, b); | 1064 c = vec_subs (a, b); |
1049 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1065 return (__m64) ((__vector long long) c)[0]; |
1050 } | 1066 } |
1051 | 1067 |
1052 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1068 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1053 _m_psubusw (__m64 __m1, __m64 __m2) | 1069 _m_psubusw (__m64 __m1, __m64 __m2) |
1054 { | 1070 { |
1066 __vector signed int zero = {0, 0, 0, 0}; | 1082 __vector signed int zero = {0, 0, 0, 0}; |
1067 | 1083 |
1068 a = (__vector signed short)vec_splats (__m1); | 1084 a = (__vector signed short)vec_splats (__m1); |
1069 b = (__vector signed short)vec_splats (__m2); | 1085 b = (__vector signed short)vec_splats (__m2); |
1070 c = vec_vmsumshm (a, b, zero); | 1086 c = vec_vmsumshm (a, b, zero); |
1071 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1087 return (__m64) ((__vector long long) c)[0]; |
1072 } | 1088 } |
1073 | 1089 |
1074 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1090 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1075 _m_pmaddwd (__m64 __m1, __m64 __m2) | 1091 _m_pmaddwd (__m64 __m1, __m64 __m2) |
1076 { | 1092 { |
1083 { | 1099 { |
1084 __vector signed short a, b; | 1100 __vector signed short a, b; |
1085 __vector signed short c; | 1101 __vector signed short c; |
1086 __vector signed int w0, w1; | 1102 __vector signed int w0, w1; |
1087 __vector unsigned char xform1 = { | 1103 __vector unsigned char xform1 = { |
1104 #ifdef __LITTLE_ENDIAN__ | |
1088 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, | 1105 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, |
1089 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F | 1106 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F |
1107 #else | |
1108 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, | |
1109 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15 | |
1110 #endif | |
1090 }; | 1111 }; |
1091 | 1112 |
1092 a = (__vector signed short)vec_splats (__m1); | 1113 a = (__vector signed short)vec_splats (__m1); |
1093 b = (__vector signed short)vec_splats (__m2); | 1114 b = (__vector signed short)vec_splats (__m2); |
1094 | 1115 |
1095 w0 = vec_vmulesh (a, b); | 1116 w0 = vec_vmulesh (a, b); |
1096 w1 = vec_vmulosh (a, b); | 1117 w1 = vec_vmulosh (a, b); |
1097 c = (__vector signed short)vec_perm (w0, w1, xform1); | 1118 c = (__vector signed short)vec_perm (w0, w1, xform1); |
1098 | 1119 |
1099 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1120 return (__m64) ((__vector long long) c)[0]; |
1100 } | 1121 } |
1101 | 1122 |
1102 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1123 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1103 _m_pmulhw (__m64 __m1, __m64 __m2) | 1124 _m_pmulhw (__m64 __m1, __m64 __m2) |
1104 { | 1125 { |
1113 __vector signed short a, b, c; | 1134 __vector signed short a, b, c; |
1114 | 1135 |
1115 a = (__vector signed short)vec_splats (__m1); | 1136 a = (__vector signed short)vec_splats (__m1); |
1116 b = (__vector signed short)vec_splats (__m2); | 1137 b = (__vector signed short)vec_splats (__m2); |
1117 c = a * b; | 1138 c = a * b; |
1118 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); | 1139 return (__m64) ((__vector long long) c)[0]; |
1119 } | 1140 } |
1120 | 1141 |
1121 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | 1142 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
1122 _m_pmullw (__m64 __m1, __m64 __m2) | 1143 _m_pmullw (__m64 __m1, __m64 __m2) |
1123 { | 1144 { |
1134 if (__count <= 15) | 1155 if (__count <= 15) |
1135 { | 1156 { |
1136 m = (__vector signed short)vec_splats (__m); | 1157 m = (__vector signed short)vec_splats (__m); |
1137 c = (__vector unsigned short)vec_splats ((unsigned short)__count); | 1158 c = (__vector unsigned short)vec_splats ((unsigned short)__count); |
1138 r = vec_sl (m, (__vector unsigned short)c); | 1159 r = vec_sl (m, (__vector unsigned short)c); |
1139 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); | 1160 return (__m64) ((__vector long long) r)[0]; |
1140 } | 1161 } |
1141 else | 1162 else |
1142 return (0); | 1163 return (0); |
1143 } | 1164 } |
1144 | 1165 |
1203 if (__count <= 15) | 1224 if (__count <= 15) |
1204 { | 1225 { |
1205 m = (__vector signed short)vec_splats (__m); | 1226 m = (__vector signed short)vec_splats (__m); |
1206 c = (__vector unsigned short)vec_splats ((unsigned short)__count); | 1227 c = (__vector unsigned short)vec_splats ((unsigned short)__count); |
1207 r = vec_sra (m, (__vector unsigned short)c); | 1228 r = vec_sra (m, (__vector unsigned short)c); |
1208 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); | 1229 return (__m64) ((__vector long long) r)[0]; |
1209 } | 1230 } |
1210 else | 1231 else |
1211 return (0); | 1232 return (0); |
1212 } | 1233 } |
1213 | 1234 |
1272 if (__count <= 15) | 1293 if (__count <= 15) |
1273 { | 1294 { |
1274 m = (__vector unsigned short)vec_splats (__m); | 1295 m = (__vector unsigned short)vec_splats (__m); |
1275 c = (__vector unsigned short)vec_splats ((unsigned short)__count); | 1296 c = (__vector unsigned short)vec_splats ((unsigned short)__count); |
1276 r = vec_sr (m, (__vector unsigned short)c); | 1297 r = vec_sr (m, (__vector unsigned short)c); |
1277 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); | 1298 return (__m64) ((__vector long long) r)[0]; |
1278 } | 1299 } |
1279 else | 1300 else |
1280 return (0); | 1301 return (0); |
1281 } | 1302 } |
1282 | 1303 |
1415 { | 1436 { |
1416 #if _ARCH_PWR9 | 1437 #if _ARCH_PWR9 |
1417 __vector signed short w; | 1438 __vector signed short w; |
1418 | 1439 |
1419 w = (__vector signed short)vec_splats (__w); | 1440 w = (__vector signed short)vec_splats (__w); |
1420 return (__builtin_unpack_vector_int128 ((__vector __int128)w, 0)); | 1441 return (__m64) ((__vector long long) w)[0]; |
1421 #else | 1442 #else |
1422 __m64_union res; | 1443 __m64_union res; |
1423 | 1444 |
1424 res.as_short[0] = __w; | 1445 res.as_short[0] = __w; |
1425 res.as_short[1] = __w; | 1446 res.as_short[1] = __w; |
1435 { | 1456 { |
1436 #if _ARCH_PWR8 | 1457 #if _ARCH_PWR8 |
1437 __vector signed char b; | 1458 __vector signed char b; |
1438 | 1459 |
1439 b = (__vector signed char)vec_splats (__b); | 1460 b = (__vector signed char)vec_splats (__b); |
1440 return (__builtin_unpack_vector_int128 ((__vector __int128)b, 0)); | 1461 return (__m64) ((__vector long long) b)[0]; |
1441 #else | 1462 #else |
1442 __m64_union res; | 1463 __m64_union res; |
1443 | 1464 |
1444 res.as_char[0] = __b; | 1465 res.as_char[0] = __b; |
1445 res.as_char[1] = __b; | 1466 res.as_char[1] = __b; |
1450 res.as_char[6] = __b; | 1471 res.as_char[6] = __b; |
1451 res.as_char[7] = __b; | 1472 res.as_char[7] = __b; |
1452 return (res.as_m64); | 1473 return (res.as_m64); |
1453 #endif | 1474 #endif |
1454 } | 1475 } |
1455 #endif /* __powerpc64__ */ | |
1456 #endif /* _MMINTRIN_H_INCLUDED */ | 1476 #endif /* _MMINTRIN_H_INCLUDED */ |