comparison gcc/config/rs6000/mmintrin.h @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Copyright (C) 2002-2018 Free Software Foundation, Inc. 1 /* Copyright (C) 2002-2020 Free Software Foundation, Inc.
2 2
3 This file is part of GCC. 3 This file is part of GCC.
4 4
5 GCC is free software; you can redistribute it and/or modify 5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
110 _m_to_int (__m64 __i) 110 _m_to_int (__m64 __i)
111 { 111 {
112 return _mm_cvtsi64_si32 (__i); 112 return _mm_cvtsi64_si32 (__i);
113 } 113 }
114 114
115 #ifdef __powerpc64__
116 /* Convert I to a __m64 object. */ 115 /* Convert I to a __m64 object. */
117 116
118 /* Intel intrinsic. */ 117 /* Intel intrinsic. */
119 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 118 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 _m_from_int64 (long long __i) 119 _m_from_int64 (long long __i)
171 _mm_packs_pi16 (__m64 __m1, __m64 __m2) 170 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
172 { 171 {
173 __vector signed short vm1; 172 __vector signed short vm1;
174 __vector signed char vresult; 173 __vector signed char vresult;
175 174
176 vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); 175 vm1 = (__vector signed short) (__vector unsigned long long)
177 vresult = vec_vpkshss (vm1, vm1); 176 #ifdef __LITTLE_ENDIAN__
178 return (__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0); 177 { __m1, __m2 };
178 #else
179 { __m2, __m1 };
180 #endif
181 vresult = vec_packs (vm1, vm1);
182 return (__m64) ((__vector long long) vresult)[0];
179 } 183 }
180 184
181 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 185 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
182 _m_packsswb (__m64 __m1, __m64 __m2) 186 _m_packsswb (__m64 __m1, __m64 __m2)
183 { 187 {
191 _mm_packs_pi32 (__m64 __m1, __m64 __m2) 195 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
192 { 196 {
193 __vector signed int vm1; 197 __vector signed int vm1;
194 __vector signed short vresult; 198 __vector signed short vresult;
195 199
196 vm1 = (__vector signed int)__builtin_pack_vector_int128 (__m2, __m1); 200 vm1 = (__vector signed int) (__vector unsigned long long)
197 vresult = vec_vpkswss (vm1, vm1); 201 #ifdef __LITTLE_ENDIAN__
198 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); 202 { __m1, __m2 };
203 #else
204 { __m2, __m1 };
205 #endif
206 vresult = vec_packs (vm1, vm1);
207 return (__m64) ((__vector long long) vresult)[0];
199 } 208 }
200 209
201 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 210 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 _m_packssdw (__m64 __m1, __m64 __m2) 211 _m_packssdw (__m64 __m1, __m64 __m2)
203 { 212 {
208 the result, and the four 16-bit values from M2 into the upper four 8-bit 217 the result, and the four 16-bit values from M2 into the upper four 8-bit
209 values of the result, all with unsigned saturation. */ 218 values of the result, all with unsigned saturation. */
210 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
211 _mm_packs_pu16 (__m64 __m1, __m64 __m2) 220 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
212 { 221 {
213 __vector signed short vm1; 222 __vector unsigned char r;
214 __vector unsigned char vresult; 223 __vector signed short vm1 = (__vector signed short) (__vector long long)
215 224 #ifdef __LITTLE_ENDIAN__
216 vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); 225 { __m1, __m2 };
217 vresult = vec_vpkshus (vm1, vm1); 226 #else
218 return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); 227 { __m2, __m1 };
228 #endif
229 const __vector signed short __zero = { 0 };
230 __vector __bool short __select = vec_cmplt (vm1, __zero);
231 r = vec_packs ((__vector unsigned short) vm1, (__vector unsigned short) vm1);
232 __vector __bool char packsel = vec_pack (__select, __select);
233 r = vec_sel (r, (const __vector unsigned char) __zero, packsel);
234 return (__m64) ((__vector long long) r)[0];
219 } 235 }
220 236
221 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
222 _m_packuswb (__m64 __m1, __m64 __m2) 238 _m_packuswb (__m64 __m1, __m64 __m2)
223 { 239 {
234 __vector unsigned char a, b, c; 250 __vector unsigned char a, b, c;
235 251
236 a = (__vector unsigned char)vec_splats (__m1); 252 a = (__vector unsigned char)vec_splats (__m1);
237 b = (__vector unsigned char)vec_splats (__m2); 253 b = (__vector unsigned char)vec_splats (__m2);
238 c = vec_mergel (a, b); 254 c = vec_mergel (a, b);
239 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 255 return (__m64) ((__vector long long) c)[1];
240 #else 256 #else
241 __m64_union m1, m2, res; 257 __m64_union m1, m2, res;
242 258
243 m1.as_m64 = __m1; 259 m1.as_m64 = __m1;
244 m2.as_m64 = __m2; 260 m2.as_m64 = __m2;
315 __vector unsigned char a, b, c; 331 __vector unsigned char a, b, c;
316 332
317 a = (__vector unsigned char)vec_splats (__m1); 333 a = (__vector unsigned char)vec_splats (__m1);
318 b = (__vector unsigned char)vec_splats (__m2); 334 b = (__vector unsigned char)vec_splats (__m2);
319 c = vec_mergel (a, b); 335 c = vec_mergel (a, b);
320 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 1)); 336 return (__m64) ((__vector long long) c)[0];
321 #else 337 #else
322 __m64_union m1, m2, res; 338 __m64_union m1, m2, res;
323 339
324 m1.as_m64 = __m1; 340 m1.as_m64 = __m1;
325 m2.as_m64 = __m2; 341 m2.as_m64 = __m2;
396 __vector signed char a, b, c; 412 __vector signed char a, b, c;
397 413
398 a = (__vector signed char)vec_splats (__m1); 414 a = (__vector signed char)vec_splats (__m1);
399 b = (__vector signed char)vec_splats (__m2); 415 b = (__vector signed char)vec_splats (__m2);
400 c = vec_add (a, b); 416 c = vec_add (a, b);
401 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 417 return (__m64) ((__vector long long) c)[0];
402 #else 418 #else
403 __m64_union m1, m2, res; 419 __m64_union m1, m2, res;
404 420
405 m1.as_m64 = __m1; 421 m1.as_m64 = __m1;
406 m2.as_m64 = __m2; 422 m2.as_m64 = __m2;
432 __vector signed short a, b, c; 448 __vector signed short a, b, c;
433 449
434 a = (__vector signed short)vec_splats (__m1); 450 a = (__vector signed short)vec_splats (__m1);
435 b = (__vector signed short)vec_splats (__m2); 451 b = (__vector signed short)vec_splats (__m2);
436 c = vec_add (a, b); 452 c = vec_add (a, b);
437 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 453 return (__m64) ((__vector long long) c)[0];
438 #else 454 #else
439 __m64_union m1, m2, res; 455 __m64_union m1, m2, res;
440 456
441 m1.as_m64 = __m1; 457 m1.as_m64 = __m1;
442 m2.as_m64 = __m2; 458 m2.as_m64 = __m2;
464 __vector signed int a, b, c; 480 __vector signed int a, b, c;
465 481
466 a = (__vector signed int)vec_splats (__m1); 482 a = (__vector signed int)vec_splats (__m1);
467 b = (__vector signed int)vec_splats (__m2); 483 b = (__vector signed int)vec_splats (__m2);
468 c = vec_add (a, b); 484 c = vec_add (a, b);
469 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 485 return (__m64) ((__vector long long) c)[0];
470 #else 486 #else
471 __m64_union m1, m2, res; 487 __m64_union m1, m2, res;
472 488
473 m1.as_m64 = __m1; 489 m1.as_m64 = __m1;
474 m2.as_m64 = __m2; 490 m2.as_m64 = __m2;
494 __vector signed char a, b, c; 510 __vector signed char a, b, c;
495 511
496 a = (__vector signed char)vec_splats (__m1); 512 a = (__vector signed char)vec_splats (__m1);
497 b = (__vector signed char)vec_splats (__m2); 513 b = (__vector signed char)vec_splats (__m2);
498 c = vec_sub (a, b); 514 c = vec_sub (a, b);
499 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 515 return (__m64) ((__vector long long) c)[0];
500 #else 516 #else
501 __m64_union m1, m2, res; 517 __m64_union m1, m2, res;
502 518
503 m1.as_m64 = __m1; 519 m1.as_m64 = __m1;
504 m2.as_m64 = __m2; 520 m2.as_m64 = __m2;
530 __vector signed short a, b, c; 546 __vector signed short a, b, c;
531 547
532 a = (__vector signed short)vec_splats (__m1); 548 a = (__vector signed short)vec_splats (__m1);
533 b = (__vector signed short)vec_splats (__m2); 549 b = (__vector signed short)vec_splats (__m2);
534 c = vec_sub (a, b); 550 c = vec_sub (a, b);
535 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 551 return (__m64) ((__vector long long) c)[0];
536 #else 552 #else
537 __m64_union m1, m2, res; 553 __m64_union m1, m2, res;
538 554
539 m1.as_m64 = __m1; 555 m1.as_m64 = __m1;
540 m2.as_m64 = __m2; 556 m2.as_m64 = __m2;
562 __vector signed int a, b, c; 578 __vector signed int a, b, c;
563 579
564 a = (__vector signed int)vec_splats (__m1); 580 a = (__vector signed int)vec_splats (__m1);
565 b = (__vector signed int)vec_splats (__m2); 581 b = (__vector signed int)vec_splats (__m2);
566 c = vec_sub (a, b); 582 c = vec_sub (a, b);
567 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 583 return (__m64) ((__vector long long) c)[0];
568 #else 584 #else
569 __m64_union m1, m2, res; 585 __m64_union m1, m2, res;
570 586
571 m1.as_m64 = __m1; 587 m1.as_m64 = __m1;
572 m2.as_m64 = __m2; 588 m2.as_m64 = __m2;
579 } 595 }
580 596
581 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 597 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
582 _m_psubd (__m64 __m1, __m64 __m2) 598 _m_psubd (__m64 __m1, __m64 __m2)
583 { 599 {
584 return _mm_add_pi32 (__m1, __m2); 600 return _mm_sub_pi32 (__m1, __m2);
585 } 601 }
586 602
587 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 603 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
588 _mm_add_si64 (__m64 __m1, __m64 __m2) 604 _mm_add_si64 (__m64 __m1, __m64 __m2)
589 { 605 {
709 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the 725 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
710 test is true and zero if false. */ 726 test is true and zero if false. */
711 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 727 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 728 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
713 { 729 {
714 #ifdef _ARCH_PWR6 730 #if defined(_ARCH_PWR6) && defined(__powerpc64__)
715 __m64 res; 731 __m64 res;
716 __asm__( 732 __asm__(
717 "cmpb %0,%1,%2;\n" 733 "cmpb %0,%1,%2;\n"
718 : "=r" (res) 734 : "=r" (res)
719 : "r" (__m1), 735 : "r" (__m1),
752 __vector signed char a, b, c; 768 __vector signed char a, b, c;
753 769
754 a = (__vector signed char)vec_splats (__m1); 770 a = (__vector signed char)vec_splats (__m1);
755 b = (__vector signed char)vec_splats (__m2); 771 b = (__vector signed char)vec_splats (__m2);
756 c = (__vector signed char)vec_cmpgt (a, b); 772 c = (__vector signed char)vec_cmpgt (a, b);
757 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 773 return (__m64) ((__vector long long) c)[0];
758 #else 774 #else
759 __m64_union m1, m2, res; 775 __m64_union m1, m2, res;
760 776
761 m1.as_m64 = __m1; 777 m1.as_m64 = __m1;
762 m2.as_m64 = __m2; 778 m2.as_m64 = __m2;
789 __vector signed short a, b, c; 805 __vector signed short a, b, c;
790 806
791 a = (__vector signed short)vec_splats (__m1); 807 a = (__vector signed short)vec_splats (__m1);
792 b = (__vector signed short)vec_splats (__m2); 808 b = (__vector signed short)vec_splats (__m2);
793 c = (__vector signed short)vec_cmpeq (a, b); 809 c = (__vector signed short)vec_cmpeq (a, b);
794 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 810 return (__m64) ((__vector long long) c)[0];
795 #else 811 #else
796 __m64_union m1, m2, res; 812 __m64_union m1, m2, res;
797 813
798 m1.as_m64 = __m1; 814 m1.as_m64 = __m1;
799 m2.as_m64 = __m2; 815 m2.as_m64 = __m2;
820 __vector signed short a, b, c; 836 __vector signed short a, b, c;
821 837
822 a = (__vector signed short)vec_splats (__m1); 838 a = (__vector signed short)vec_splats (__m1);
823 b = (__vector signed short)vec_splats (__m2); 839 b = (__vector signed short)vec_splats (__m2);
824 c = (__vector signed short)vec_cmpgt (a, b); 840 c = (__vector signed short)vec_cmpgt (a, b);
825 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 841 return (__m64) ((__vector long long) c)[0];
826 #else 842 #else
827 __m64_union m1, m2, res; 843 __m64_union m1, m2, res;
828 844
829 m1.as_m64 = __m1; 845 m1.as_m64 = __m1;
830 m2.as_m64 = __m2; 846 m2.as_m64 = __m2;
853 __vector signed int a, b, c; 869 __vector signed int a, b, c;
854 870
855 a = (__vector signed int)vec_splats (__m1); 871 a = (__vector signed int)vec_splats (__m1);
856 b = (__vector signed int)vec_splats (__m2); 872 b = (__vector signed int)vec_splats (__m2);
857 c = (__vector signed int)vec_cmpeq (a, b); 873 c = (__vector signed int)vec_cmpeq (a, b);
858 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 874 return (__m64) ((__vector long long) c)[0];
859 #else 875 #else
860 __m64_union m1, m2, res; 876 __m64_union m1, m2, res;
861 877
862 m1.as_m64 = __m1; 878 m1.as_m64 = __m1;
863 m2.as_m64 = __m2; 879 m2.as_m64 = __m2;
882 __vector signed int a, b, c; 898 __vector signed int a, b, c;
883 899
884 a = (__vector signed int)vec_splats (__m1); 900 a = (__vector signed int)vec_splats (__m1);
885 b = (__vector signed int)vec_splats (__m2); 901 b = (__vector signed int)vec_splats (__m2);
886 c = (__vector signed int)vec_cmpgt (a, b); 902 c = (__vector signed int)vec_cmpgt (a, b);
887 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 903 return (__m64) ((__vector long long) c)[0];
888 #else 904 #else
889 __m64_union m1, m2, res; 905 __m64_union m1, m2, res;
890 906
891 m1.as_m64 = __m1; 907 m1.as_m64 = __m1;
892 m2.as_m64 = __m2; 908 m2.as_m64 = __m2;
913 __vector signed char a, b, c; 929 __vector signed char a, b, c;
914 930
915 a = (__vector signed char)vec_splats (__m1); 931 a = (__vector signed char)vec_splats (__m1);
916 b = (__vector signed char)vec_splats (__m2); 932 b = (__vector signed char)vec_splats (__m2);
917 c = vec_adds (a, b); 933 c = vec_adds (a, b);
918 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 934 return (__m64) ((__vector long long) c)[0];
919 } 935 }
920 936
921 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 937 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922 _m_paddsb (__m64 __m1, __m64 __m2) 938 _m_paddsb (__m64 __m1, __m64 __m2)
923 { 939 {
931 __vector signed short a, b, c; 947 __vector signed short a, b, c;
932 948
933 a = (__vector signed short)vec_splats (__m1); 949 a = (__vector signed short)vec_splats (__m1);
934 b = (__vector signed short)vec_splats (__m2); 950 b = (__vector signed short)vec_splats (__m2);
935 c = vec_adds (a, b); 951 c = vec_adds (a, b);
936 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 952 return (__m64) ((__vector long long) c)[0];
937 } 953 }
938 954
939 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 955 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
940 _m_paddsw (__m64 __m1, __m64 __m2) 956 _m_paddsw (__m64 __m1, __m64 __m2)
941 { 957 {
949 __vector unsigned char a, b, c; 965 __vector unsigned char a, b, c;
950 966
951 a = (__vector unsigned char)vec_splats (__m1); 967 a = (__vector unsigned char)vec_splats (__m1);
952 b = (__vector unsigned char)vec_splats (__m2); 968 b = (__vector unsigned char)vec_splats (__m2);
953 c = vec_adds (a, b); 969 c = vec_adds (a, b);
954 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 970 return (__m64) ((__vector long long) c)[0];
955 } 971 }
956 972
957 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 973 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
958 _m_paddusb (__m64 __m1, __m64 __m2) 974 _m_paddusb (__m64 __m1, __m64 __m2)
959 { 975 {
968 __vector unsigned short a, b, c; 984 __vector unsigned short a, b, c;
969 985
970 a = (__vector unsigned short)vec_splats (__m1); 986 a = (__vector unsigned short)vec_splats (__m1);
971 b = (__vector unsigned short)vec_splats (__m2); 987 b = (__vector unsigned short)vec_splats (__m2);
972 c = vec_adds (a, b); 988 c = vec_adds (a, b);
973 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 989 return (__m64) ((__vector long long) c)[0];
974 } 990 }
975 991
976 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 992 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
977 _m_paddusw (__m64 __m1, __m64 __m2) 993 _m_paddusw (__m64 __m1, __m64 __m2)
978 { 994 {
987 __vector signed char a, b, c; 1003 __vector signed char a, b, c;
988 1004
989 a = (__vector signed char)vec_splats (__m1); 1005 a = (__vector signed char)vec_splats (__m1);
990 b = (__vector signed char)vec_splats (__m2); 1006 b = (__vector signed char)vec_splats (__m2);
991 c = vec_subs (a, b); 1007 c = vec_subs (a, b);
992 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1008 return (__m64) ((__vector long long) c)[0];
993 } 1009 }
994 1010
995 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1011 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
996 _m_psubsb (__m64 __m1, __m64 __m2) 1012 _m_psubsb (__m64 __m1, __m64 __m2)
997 { 1013 {
1006 __vector signed short a, b, c; 1022 __vector signed short a, b, c;
1007 1023
1008 a = (__vector signed short)vec_splats (__m1); 1024 a = (__vector signed short)vec_splats (__m1);
1009 b = (__vector signed short)vec_splats (__m2); 1025 b = (__vector signed short)vec_splats (__m2);
1010 c = vec_subs (a, b); 1026 c = vec_subs (a, b);
1011 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1027 return (__m64) ((__vector long long) c)[0];
1012 } 1028 }
1013 1029
1014 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1030 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1015 _m_psubsw (__m64 __m1, __m64 __m2) 1031 _m_psubsw (__m64 __m1, __m64 __m2)
1016 { 1032 {
1025 __vector unsigned char a, b, c; 1041 __vector unsigned char a, b, c;
1026 1042
1027 a = (__vector unsigned char)vec_splats (__m1); 1043 a = (__vector unsigned char)vec_splats (__m1);
1028 b = (__vector unsigned char)vec_splats (__m2); 1044 b = (__vector unsigned char)vec_splats (__m2);
1029 c = vec_subs (a, b); 1045 c = vec_subs (a, b);
1030 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1046 return (__m64) ((__vector long long) c)[0];
1031 } 1047 }
1032 1048
1033 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1049 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1034 _m_psubusb (__m64 __m1, __m64 __m2) 1050 _m_psubusb (__m64 __m1, __m64 __m2)
1035 { 1051 {
1044 __vector unsigned short a, b, c; 1060 __vector unsigned short a, b, c;
1045 1061
1046 a = (__vector unsigned short)vec_splats (__m1); 1062 a = (__vector unsigned short)vec_splats (__m1);
1047 b = (__vector unsigned short)vec_splats (__m2); 1063 b = (__vector unsigned short)vec_splats (__m2);
1048 c = vec_subs (a, b); 1064 c = vec_subs (a, b);
1049 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1065 return (__m64) ((__vector long long) c)[0];
1050 } 1066 }
1051 1067
1052 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1068 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1053 _m_psubusw (__m64 __m1, __m64 __m2) 1069 _m_psubusw (__m64 __m1, __m64 __m2)
1054 { 1070 {
1066 __vector signed int zero = {0, 0, 0, 0}; 1082 __vector signed int zero = {0, 0, 0, 0};
1067 1083
1068 a = (__vector signed short)vec_splats (__m1); 1084 a = (__vector signed short)vec_splats (__m1);
1069 b = (__vector signed short)vec_splats (__m2); 1085 b = (__vector signed short)vec_splats (__m2);
1070 c = vec_vmsumshm (a, b, zero); 1086 c = vec_vmsumshm (a, b, zero);
1071 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1087 return (__m64) ((__vector long long) c)[0];
1072 } 1088 }
1073 1089
1074 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1090 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1075 _m_pmaddwd (__m64 __m1, __m64 __m2) 1091 _m_pmaddwd (__m64 __m1, __m64 __m2)
1076 { 1092 {
1083 { 1099 {
1084 __vector signed short a, b; 1100 __vector signed short a, b;
1085 __vector signed short c; 1101 __vector signed short c;
1086 __vector signed int w0, w1; 1102 __vector signed int w0, w1;
1087 __vector unsigned char xform1 = { 1103 __vector unsigned char xform1 = {
1104 #ifdef __LITTLE_ENDIAN__
1088 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 1105 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
1089 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F 1106 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1107 #else
1108 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
1109 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
1110 #endif
1090 }; 1111 };
1091 1112
1092 a = (__vector signed short)vec_splats (__m1); 1113 a = (__vector signed short)vec_splats (__m1);
1093 b = (__vector signed short)vec_splats (__m2); 1114 b = (__vector signed short)vec_splats (__m2);
1094 1115
1095 w0 = vec_vmulesh (a, b); 1116 w0 = vec_vmulesh (a, b);
1096 w1 = vec_vmulosh (a, b); 1117 w1 = vec_vmulosh (a, b);
1097 c = (__vector signed short)vec_perm (w0, w1, xform1); 1118 c = (__vector signed short)vec_perm (w0, w1, xform1);
1098 1119
1099 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1120 return (__m64) ((__vector long long) c)[0];
1100 } 1121 }
1101 1122
1102 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1123 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1103 _m_pmulhw (__m64 __m1, __m64 __m2) 1124 _m_pmulhw (__m64 __m1, __m64 __m2)
1104 { 1125 {
1113 __vector signed short a, b, c; 1134 __vector signed short a, b, c;
1114 1135
1115 a = (__vector signed short)vec_splats (__m1); 1136 a = (__vector signed short)vec_splats (__m1);
1116 b = (__vector signed short)vec_splats (__m2); 1137 b = (__vector signed short)vec_splats (__m2);
1117 c = a * b; 1138 c = a * b;
1118 return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); 1139 return (__m64) ((__vector long long) c)[0];
1119 } 1140 }
1120 1141
1121 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1142 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1122 _m_pmullw (__m64 __m1, __m64 __m2) 1143 _m_pmullw (__m64 __m1, __m64 __m2)
1123 { 1144 {
1134 if (__count <= 15) 1155 if (__count <= 15)
1135 { 1156 {
1136 m = (__vector signed short)vec_splats (__m); 1157 m = (__vector signed short)vec_splats (__m);
1137 c = (__vector unsigned short)vec_splats ((unsigned short)__count); 1158 c = (__vector unsigned short)vec_splats ((unsigned short)__count);
1138 r = vec_sl (m, (__vector unsigned short)c); 1159 r = vec_sl (m, (__vector unsigned short)c);
1139 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); 1160 return (__m64) ((__vector long long) r)[0];
1140 } 1161 }
1141 else 1162 else
1142 return (0); 1163 return (0);
1143 } 1164 }
1144 1165
1203 if (__count <= 15) 1224 if (__count <= 15)
1204 { 1225 {
1205 m = (__vector signed short)vec_splats (__m); 1226 m = (__vector signed short)vec_splats (__m);
1206 c = (__vector unsigned short)vec_splats ((unsigned short)__count); 1227 c = (__vector unsigned short)vec_splats ((unsigned short)__count);
1207 r = vec_sra (m, (__vector unsigned short)c); 1228 r = vec_sra (m, (__vector unsigned short)c);
1208 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); 1229 return (__m64) ((__vector long long) r)[0];
1209 } 1230 }
1210 else 1231 else
1211 return (0); 1232 return (0);
1212 } 1233 }
1213 1234
1272 if (__count <= 15) 1293 if (__count <= 15)
1273 { 1294 {
1274 m = (__vector unsigned short)vec_splats (__m); 1295 m = (__vector unsigned short)vec_splats (__m);
1275 c = (__vector unsigned short)vec_splats ((unsigned short)__count); 1296 c = (__vector unsigned short)vec_splats ((unsigned short)__count);
1276 r = vec_sr (m, (__vector unsigned short)c); 1297 r = vec_sr (m, (__vector unsigned short)c);
1277 return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); 1298 return (__m64) ((__vector long long) r)[0];
1278 } 1299 }
1279 else 1300 else
1280 return (0); 1301 return (0);
1281 } 1302 }
1282 1303
1415 { 1436 {
1416 #if _ARCH_PWR9 1437 #if _ARCH_PWR9
1417 __vector signed short w; 1438 __vector signed short w;
1418 1439
1419 w = (__vector signed short)vec_splats (__w); 1440 w = (__vector signed short)vec_splats (__w);
1420 return (__builtin_unpack_vector_int128 ((__vector __int128)w, 0)); 1441 return (__m64) ((__vector long long) w)[0];
1421 #else 1442 #else
1422 __m64_union res; 1443 __m64_union res;
1423 1444
1424 res.as_short[0] = __w; 1445 res.as_short[0] = __w;
1425 res.as_short[1] = __w; 1446 res.as_short[1] = __w;
1435 { 1456 {
1436 #if _ARCH_PWR8 1457 #if _ARCH_PWR8
1437 __vector signed char b; 1458 __vector signed char b;
1438 1459
1439 b = (__vector signed char)vec_splats (__b); 1460 b = (__vector signed char)vec_splats (__b);
1440 return (__builtin_unpack_vector_int128 ((__vector __int128)b, 0)); 1461 return (__m64) ((__vector long long) b)[0];
1441 #else 1462 #else
1442 __m64_union res; 1463 __m64_union res;
1443 1464
1444 res.as_char[0] = __b; 1465 res.as_char[0] = __b;
1445 res.as_char[1] = __b; 1466 res.as_char[1] = __b;
1450 res.as_char[6] = __b; 1471 res.as_char[6] = __b;
1451 res.as_char[7] = __b; 1472 res.as_char[7] = __b;
1452 return (res.as_m64); 1473 return (res.as_m64);
1453 #endif 1474 #endif
1454 } 1475 }
1455 #endif /* __powerpc64__ */
1456 #endif /* _MMINTRIN_H_INCLUDED */ 1476 #endif /* _MMINTRIN_H_INCLUDED */