comparison gcc/config/rs6000/si2vmx.h @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
3
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
7 any later version.
8
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 Under Section 7 of GPL version 3, you are granted additional
15 permissions described in the GCC Runtime Library Exception, version
16 3.1, as published by the Free Software Foundation.
17
18 You should have received a copy of the GNU General Public License and
19 a copy of the GCC Runtime Library Exception along with this program;
20 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
21 <http://www.gnu.org/licenses/>. */
22
23 #ifndef _SI2VMX_H_
24 #define _SI2VMX_H_ 1
25
26 #ifndef __SPU__
27
28 #include <stdlib.h>
29 #include <vec_types.h>
30
31
32 /* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
33 * Users can override the action by defining it prior to including this
34 * header file.
35 */
36 #ifndef SPU_HALT_ACTION
37 #define SPU_HALT_ACTION abort()
38 #endif
39
40 /* Specify a default stop action for the spu_stop intrinsic.
41 * Users can override the action by defining it prior to including this
42 * header file.
43 */
44 #ifndef SPU_STOP_ACTION
45 #define SPU_STOP_ACTION abort()
46 #endif
47
48
49 /* Specify a default action for unsupported intrinsic.
50 * Users can override the action by defining it prior to including this
51 * header file.
52 */
53 #ifndef SPU_UNSUPPORTED_ACTION
54 #define SPU_UNSUPPORTED_ACTION abort()
55 #endif
56
57
58 /* Casting intrinsics - from scalar to quadword
59 */
60
61 static __inline qword si_from_uchar(unsigned char c) {
62 union {
63 qword q;
64 unsigned char c[16];
65 } x;
66 x.c[3] = c;
67 return (x.q);
68 }
69
70 static __inline qword si_from_char(signed char c) {
71 union {
72 qword q;
73 signed char c[16];
74 } x;
75 x.c[3] = c;
76 return (x.q);
77 }
78
79 static __inline qword si_from_ushort(unsigned short s) {
80 union {
81 qword q;
82 unsigned short s[8];
83 } x;
84 x.s[1] = s;
85 return (x.q);
86 }
87
88 static __inline qword si_from_short(short s) {
89 union {
90 qword q;
91 short s[8];
92 } x;
93 x.s[1] = s;
94 return (x.q);
95 }
96
97
98 static __inline qword si_from_uint(unsigned int i) {
99 union {
100 qword q;
101 unsigned int i[4];
102 } x;
103 x.i[0] = i;
104 return (x.q);
105 }
106
107 static __inline qword si_from_int(int i) {
108 union {
109 qword q;
110 int i[4];
111 } x;
112 x.i[0] = i;
113 return (x.q);
114 }
115
116 static __inline qword si_from_ullong(unsigned long long l) {
117 union {
118 qword q;
119 unsigned long long l[2];
120 } x;
121 x.l[0] = l;
122 return (x.q);
123 }
124
125 static __inline qword si_from_llong(long long l) {
126 union {
127 qword q;
128 long long l[2];
129 } x;
130 x.l[0] = l;
131 return (x.q);
132 }
133
134 static __inline qword si_from_float(float f) {
135 union {
136 qword q;
137 float f[4];
138 } x;
139 x.f[0] = f;
140 return (x.q);
141 }
142
143 static __inline qword si_from_double(double d) {
144 union {
145 qword q;
146 double d[2];
147 } x;
148 x.d[0] = d;
149 return (x.q);
150 }
151
152 static __inline qword si_from_ptr(void *ptr) {
153 union {
154 qword q;
155 void *p;
156 } x;
157 x.p = ptr;
158 return (x.q);
159 }
160
161
162 /* Casting intrinsics - from quadword to scalar
163 */
164 static __inline unsigned char si_to_uchar(qword q) {
165 union {
166 qword q;
167 unsigned char c[16];
168 } x;
169 x.q = q;
170 return (x.c[3]);
171 }
172
173 static __inline signed char si_to_char(qword q) {
174 union {
175 qword q;
176 signed char c[16];
177 } x;
178 x.q = q;
179 return (x.c[3]);
180 }
181
182 static __inline unsigned short si_to_ushort(qword q) {
183 union {
184 qword q;
185 unsigned short s[8];
186 } x;
187 x.q = q;
188 return (x.s[1]);
189 }
190
191 static __inline short si_to_short(qword q) {
192 union {
193 qword q;
194 short s[8];
195 } x;
196 x.q = q;
197 return (x.s[1]);
198 }
199
200 static __inline unsigned int si_to_uint(qword q) {
201 union {
202 qword q;
203 unsigned int i[4];
204 } x;
205 x.q = q;
206 return (x.i[0]);
207 }
208
209 static __inline int si_to_int(qword q) {
210 union {
211 qword q;
212 int i[4];
213 } x;
214 x.q = q;
215 return (x.i[0]);
216 }
217
218 static __inline unsigned long long si_to_ullong(qword q) {
219 union {
220 qword q;
221 unsigned long long l[2];
222 } x;
223 x.q = q;
224 return (x.l[0]);
225 }
226
227 static __inline long long si_to_llong(qword q) {
228 union {
229 qword q;
230 long long l[2];
231 } x;
232 x.q = q;
233 return (x.l[0]);
234 }
235
236 static __inline float si_to_float(qword q) {
237 union {
238 qword q;
239 float f[4];
240 } x;
241 x.q = q;
242 return (x.f[0]);
243 }
244
245 static __inline double si_to_double(qword q) {
246 union {
247 qword q;
248 double d[2];
249 } x;
250 x.q = q;
251 return (x.d[0]);
252 }
253
254 static __inline void * si_to_ptr(qword q) {
255 union {
256 qword q;
257 void *p;
258 } x;
259 x.q = q;
260 return (x.p);
261 }
262
263
264 /* Absolute difference
265 */
266 static __inline qword si_absdb(qword a, qword b)
267 {
268 vec_uchar16 ac, bc, dc;
269
270 ac = (vec_uchar16)(a);
271 bc = (vec_uchar16)(b);
272 dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc));
273
274 return ((qword)(dc));
275 }
276
277 /* Add intrinsics
278 */
279 #define si_a(_a, _b) ((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
280
281 #define si_ah(_a, _b) ((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b))))
282
283 static __inline qword si_ai(qword a, int b)
284 {
285 return ((qword)(vec_add((vec_int4)(a),
286 vec_splat((vec_int4)(si_from_int(b)), 0))));
287 }
288
289
290 static __inline qword si_ahi(qword a, short b)
291 {
292 return ((qword)(vec_add((vec_short8)(a),
293 vec_splat((vec_short8)(si_from_short(b)), 1))));
294 }
295
296
297 #define si_fa(_a, _b) ((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b))))
298
299
300 static __inline qword si_dfa(qword a, qword b)
301 {
302 union {
303 vec_double2 v;
304 double d[2];
305 } ad, bd, dd;
306
307 ad.v = (vec_double2)(a);
308 bd.v = (vec_double2)(b);
309 dd.d[0] = ad.d[0] + bd.d[0];
310 dd.d[1] = ad.d[1] + bd.d[1];
311
312 return ((qword)(dd.v));
313 }
314
315 /* Add word extended
316 */
317 #define si_addx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \
318 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
319
320
321 /* Bit-wise AND
322 */
323 #define si_and(_a, _b) ((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b))))
324
325
326 static __inline qword si_andbi(qword a, signed char b)
327 {
328 return ((qword)(vec_and((vec_char16)(a),
329 vec_splat((vec_char16)(si_from_char(b)), 3))));
330 }
331
332 static __inline qword si_andhi(qword a, signed short b)
333 {
334 return ((qword)(vec_and((vec_short8)(a),
335 vec_splat((vec_short8)(si_from_short(b)), 1))));
336 }
337
338
339 static __inline qword si_andi(qword a, signed int b)
340 {
341 return ((qword)(vec_and((vec_int4)(a),
342 vec_splat((vec_int4)(si_from_int(b)), 0))));
343 }
344
345
346 /* Bit-wise AND with complement
347 */
348 #define si_andc(_a, _b) ((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b))))
349
350
351 /* Average byte vectors
352 */
353 #define si_avgb(_a, _b) ((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b))))
354
355
356 /* Branch indirect and set link on external data
357 */
358 #define si_bisled(_func) /* not mappable */
359 #define si_bisledd(_func) /* not mappable */
360 #define si_bislede(_func) /* not mappable */
361
362
363 /* Borrow generate
364 */
365 #define si_bg(_a, _b) ((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a))))
366
367 #define si_bgx(_a, _b, _c) ((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)), \
368 vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), \
369 (vec_uint4)(_c))), vec_splat_u32(1))))
370
371 /* Compare absolute equal
372 */
373 static __inline qword si_fcmeq(qword a, qword b)
374 {
375 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
376
377 return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb),
378 vec_andc((vec_float4)(b), msb))));
379 }
380
381 static __inline qword si_dfcmeq(qword a, qword b)
382 {
383 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
384 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
385 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27};
386
387 vec_uint4 biteq;
388 vec_uint4 aabs;
389 vec_uint4 babs;
390 vec_uint4 a_gt;
391 vec_uint4 ahi_inf;
392 vec_uint4 anan;
393 vec_uint4 result;
394
395 union {
396 vec_uchar16 v;
397 int i[4];
398 } x;
399
400 /* Shift 4 bytes */
401 x.i[3] = 4 << 3;
402
403 /* Mask out sign bits */
404 aabs = vec_and((vec_uint4)a,sign_mask);
405 babs = vec_and((vec_uint4)b,sign_mask);
406
407 /* A) Check for bit equality, store in high word */
408 biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
409 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
410
411 /*
412 B) Check if a is NaN, store in high word
413
414 B1) If the high word is greater than max_exp (indicates a NaN)
415 B2) If the low word is greater than 0
416 */
417 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
418
419 /* B3) Check if the high word is equal to the inf exponent */
420 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
421
422 /* anan = B1[hi] or (B2[lo] and B3[hi]) */
423 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
424
425 /* result = A and not B */
426 result = vec_andc(biteq, anan);
427
428 /* Promote high words to 64 bits and return */
429 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
430 }
431
432
433 /* Compare absolute greater than
434 */
435 static __inline qword si_fcmgt(qword a, qword b)
436 {
437 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
438
439 return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
440 vec_andc((vec_float4)(b), msb))));
441 }
442
443 static __inline qword si_dfcmgt(qword a, qword b)
444 {
445 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
446 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
447 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
448
449 union {
450 vec_uchar16 v;
451 int i[4];
452 } x;
453
454 /* Shift 4 bytes */
455 x.i[3] = 4 << 3;
456
457 // absolute value of a,b
458 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
459 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
460
461 // check if a is nan
462 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
463 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
464 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
465 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
466
467 // check if b is nan
468 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
469 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
470 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
471 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
472
473 // A) Check if the exponents are different
474 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
475
476 // B) Check if high word equal, and low word greater
477 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs);
478 vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
479 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
480
481 // If either A or B is true, return true (unless NaNs detected)
482 vec_uint4 r = vec_or(gt_hi, eqgt);
483
484 // splat the high words of the comparison step
485 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
486
487 // correct for NaNs in input
488 return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
489 }
490
491
492 /* Compare equal
493 */
494 static __inline qword si_ceqb(qword a, qword b)
495 {
496 return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b))));
497 }
498
499 static __inline qword si_ceqh(qword a, qword b)
500 {
501 return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b))));
502 }
503
504 static __inline qword si_ceq(qword a, qword b)
505 {
506 return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b))));
507 }
508
509 static __inline qword si_fceq(qword a, qword b)
510 {
511 return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b))));
512 }
513
514 static __inline qword si_ceqbi(qword a, signed char b)
515 {
516 return ((qword)(vec_cmpeq((vec_char16)(a),
517 vec_splat((vec_char16)(si_from_char(b)), 3))));
518 }
519
520 static __inline qword si_ceqhi(qword a, signed short b)
521 {
522 return ((qword)(vec_cmpeq((vec_short8)(a),
523 vec_splat((vec_short8)(si_from_short(b)), 1))));
524 }
525
526 static __inline qword si_ceqi(qword a, signed int b)
527 {
528 return ((qword)(vec_cmpeq((vec_int4)(a),
529 vec_splat((vec_int4)(si_from_int(b)), 0))));
530 }
531
532 static __inline qword si_dfceq(qword a, qword b)
533 {
534 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
535 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
536 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27};
537
538 vec_uint4 biteq;
539 vec_uint4 aabs;
540 vec_uint4 babs;
541 vec_uint4 a_gt;
542 vec_uint4 ahi_inf;
543 vec_uint4 anan;
544 vec_uint4 iszero;
545 vec_uint4 result;
546
547 union {
548 vec_uchar16 v;
549 int i[4];
550 } x;
551
552 /* Shift 4 bytes */
553 x.i[3] = 4 << 3;
554
555 /* A) Check for bit equality, store in high word */
556 biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b);
557 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
558
559 /* Mask out sign bits */
560 aabs = vec_and((vec_uint4)a,sign_mask);
561 babs = vec_and((vec_uint4)b,sign_mask);
562
563 /*
564 B) Check if a is NaN, store in high word
565
566 B1) If the high word is greater than max_exp (indicates a NaN)
567 B2) If the low word is greater than 0
568 */
569 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
570
571 /* B3) Check if the high word is equal to the inf exponent */
572 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
573
574 /* anan = B1[hi] or (B2[lo] and B3[hi]) */
575 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
576
577 /* C) Check for 0 = -0 special case */
578 iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0));
579 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
580
581 /* result = (A or C) and not B */
582 result = vec_or(biteq,iszero);
583 result = vec_andc(result, anan);
584
585 /* Promote high words to 64 bits and return */
586 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
587 }
588
589
590 /* Compare greater than
591 */
592 static __inline qword si_cgtb(qword a, qword b)
593 {
594 return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b))));
595 }
596
597 static __inline qword si_cgth(qword a, qword b)
598 {
599 return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b))));
600 }
601
602 static __inline qword si_cgt(qword a, qword b)
603 {
604 return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b))));
605 }
606
607 static __inline qword si_clgtb(qword a, qword b)
608 {
609 return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b))));
610 }
611
612 static __inline qword si_clgth(qword a, qword b)
613 {
614 return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b))));
615 }
616
617 static __inline qword si_clgt(qword a, qword b)
618 {
619 return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b))));
620 }
621
622 static __inline qword si_fcgt(qword a, qword b)
623 {
624 return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b))));
625 }
626
627 static __inline qword si_dfcgt(qword a, qword b)
628 {
629 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
630 vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 };
631 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
632 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
633
634 union {
635 vec_uchar16 v;
636 int i[4];
637 } x;
638
639 /* Shift 4 bytes */
640 x.i[3] = 4 << 3;
641
642 // absolute value of a,b
643 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
644 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
645
646 // check if a is nan
647 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
648 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
649 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
650 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
651
652 // check if b is nan
653 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
654 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
655 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
656 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
657
658 // sign of a
659 vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
660 asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi);
661
662 // sign of b
663 vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
664 bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi);
665
666 // negative a
667 vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs);
668 vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7)));
669 abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
670 vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1)));
671
672 // pick the one we want
673 vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel);
674
675 // negative b
676 vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs);
677 bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
678 vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1)));
679
680 // pick the one we want
681 vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
682
683 // A) Check if the exponents are different
684 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
685
686 // B) Check if high word equal, and low word greater
687 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval);
688 vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
689 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
690
691 // If either A or B is true, return true (unless NaNs detected)
692 vec_uint4 r = vec_or(gt_hi, eqgt);
693
694 // splat the high words of the comparison step
695 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
696
697 // correct for NaNs in input
698 return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
699 }
700
701 static __inline qword si_cgtbi(qword a, signed char b)
702 {
703 return ((qword)(vec_cmpgt((vec_char16)(a),
704 vec_splat((vec_char16)(si_from_char(b)), 3))));
705 }
706
707 static __inline qword si_cgthi(qword a, signed short b)
708 {
709 return ((qword)(vec_cmpgt((vec_short8)(a),
710 vec_splat((vec_short8)(si_from_short(b)), 1))));
711 }
712
713 static __inline qword si_cgti(qword a, signed int b)
714 {
715 return ((qword)(vec_cmpgt((vec_int4)(a),
716 vec_splat((vec_int4)(si_from_int(b)), 0))));
717 }
718
719 static __inline qword si_clgtbi(qword a, unsigned char b)
720 {
721 return ((qword)(vec_cmpgt((vec_uchar16)(a),
722 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
723 }
724
725 static __inline qword si_clgthi(qword a, unsigned short b)
726 {
727 return ((qword)(vec_cmpgt((vec_ushort8)(a),
728 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
729 }
730
731 static __inline qword si_clgti(qword a, unsigned int b)
732 {
733 return ((qword)(vec_cmpgt((vec_uint4)(a),
734 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
735 }
736
737 static __inline qword si_dftsv(qword a, char b)
738 {
739 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
740 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
741 vec_uint4 result = (vec_uint4){0};
742 vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
743 sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
744 vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
745
746 union {
747 vec_uchar16 v;
748 int i[4];
749 } x;
750
751 /* Shift 4 bytes */
752 x.i[3] = 4 << 3;
753
754 /* Nan or +inf or -inf */
755 if (b & 0x70)
756 {
757 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
758 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
759 /* NaN */
760 if (b & 0x40)
761 {
762 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
763 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
764 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
765 result = vec_or(result, a_nan);
766 }
767 /* inf */
768 if (b & 0x30)
769 {
770 a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
771 a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi);
772 /* +inf */
773 if (b & 0x20)
774 result = vec_or(vec_andc(a_inf, sign), result);
775 /* -inf */
776 if (b & 0x10)
777 result = vec_or(vec_and(a_inf, sign), result);
778 }
779 }
780 /* 0 or denorm */
781 if (b & 0xF)
782 {
783 vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0));
784 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
785 /* denorm */
786 if (b & 0x3)
787 {
788 vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF};
789 vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero);
790 isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi);
791 /* +denorm */
792 if (b & 0x2)
793 result = vec_or(vec_andc(isdenorm, sign), result);
794 /* -denorm */
795 if (b & 0x1)
796 result = vec_or(vec_and(isdenorm, sign), result);
797 }
798 /* 0 */
799 if (b & 0xC)
800 {
801 iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi);
802 /* +0 */
803 if (b & 0x8)
804 result = vec_or(vec_andc(iszero, sign), result);
805 /* -0 */
806 if (b & 0x4)
807 result = vec_or(vec_and(iszero, sign), result);
808 }
809 }
810 return ((qword)result);
811 }
812
813
814 /* Carry generate
815 */
816 #define si_cg(_a, _b) ((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b))))
817
818 #define si_cgx(_a, _b, _c) ((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), \
819 vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \
820 vec_and((vec_uint4)(_c), vec_splat_u32(1))))))
821
822
823 /* Count ones for bytes
824 */
825 static __inline qword si_cntb(qword a)
826 {
827 vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
828 vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
829 vec_uchar16 av;
830
831 av = (vec_uchar16)(a);
832
833 return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av),
834 vec_perm(nib_cnt, nib_cnt, vec_sr (av, four)))));
835 }
836
837 /* Count ones for bytes
838 */
839 static __inline qword si_clz(qword a)
840 {
841 vec_uchar16 av;
842 vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3;
843 vec_uchar16 four = vec_splat_u8(4);
844 vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
845 vec_uchar16 eight = vec_splat_u8(8);
846 vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
847 vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24};
848
849 av = (vec_uchar16)(a);
850
851 cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four));
852 cnt_lo = vec_perm(nib_cnt, nib_cnt, av);
853
854 cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four)));
855
856 tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight));
857 tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen));
858 tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour));
859
860 cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
861 cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
862 cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
863
864 return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
865 }
866
867 /* Convert to float
868 */
869 #define si_cuflt(_a, _b) ((qword)(vec_ctf((vec_uint4)(_a), _b)))
870 #define si_csflt(_a, _b) ((qword)(vec_ctf((vec_int4)(_a), _b)))
871
872 /* Convert to signed int
873 */
874 #define si_cflts(_a, _b) ((qword)(vec_cts((vec_float4)(_a), _b)))
875
876 /* Convert to unsigned int
877 */
878 #define si_cfltu(_a, _b) ((qword)(vec_ctu((vec_float4)(_a), _b)))
879
880 /* Synchronize
881 */
882 #define si_dsync() /* do nothing */
883 #define si_sync() /* do nothing */
884 #define si_syncc() /* do nothing */
885
886
887 /* Equivalence
888 */
889 static __inline qword si_eqv(qword a, qword b)
890 {
891 vec_uchar16 d;
892
893 d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b));
894 return ((qword)(vec_nor(d, d)));
895 }
896
897 /* Extend
898 */
899 static __inline qword si_xsbh(qword a)
900 {
901 vec_char16 av;
902
903 av = (vec_char16)(a);
904 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15,
905 0, 0, 0, 0, 0, 0, 0, 0})))));
906 }
907
908 static __inline qword si_xshw(qword a)
909 {
910 vec_short8 av;
911
912 av = (vec_short8)(a);
913 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7,
914 10,11,14,15,
915 0, 0, 0, 0,
916 0, 0, 0, 0})))));
917 }
918
919 static __inline qword si_xswd(qword a)
920 {
921 vec_int4 av;
922
923 av = (vec_int4)(a);
924 return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})),
925 ((vec_uchar16){20, 21, 22, 23,
926 4, 5, 6, 7,
927 28, 29, 30, 31,
928 12, 13, 14, 15}))));
929 }
930
931 static __inline qword si_fesd(qword a)
932 {
933 union {
934 double d[2];
935 vec_double2 vd;
936 } out;
937 union {
938 float f[4];
939 vec_float4 vf;
940 } in;
941
942 in.vf = (vec_float4)(a);
943 out.d[0] = (double)(in.f[0]);
944 out.d[1] = (double)(in.f[2]);
945 return ((qword)(out.vd));
946 }
947
948 /* Gather
949 */
950 static __inline qword si_gbb(qword a)
951 {
952 vec_uchar16 bits;
953 vec_uint4 bytes;
954
955 bits = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0,
956 7, 6, 5, 4, 3, 2, 1, 0}));
957 bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0}));
958
959 return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0,
960 0, 0, 0, 0, 0, 0, 0, 0}))));
961 }
962
963
964 static __inline qword si_gbh(qword a)
965 {
966 vec_ushort8 bits;
967 vec_uint4 bytes;
968
969 bits = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0}));
970
971 bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0});
972
973 return ((qword)(vec_sld(bytes, bytes, 12)));
974 }
975
976 static __inline qword si_gb(qword a)
977 {
978 vec_uint4 bits;
979 vec_uint4 bytes;
980
981 bits = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0}));
982 bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0}));
983 return ((qword)(vec_sld(bytes, bytes, 12)));
984 }
985
986
987 /* Compare and halt
988 */
989 static __inline void si_heq(qword a, qword b)
990 {
991 union {
992 vector unsigned int v;
993 unsigned int i[4];
994 } aa, bb;
995
996 aa.v = (vector unsigned int)(a);
997 bb.v = (vector unsigned int)(b);
998
999 if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; };
1000 }
1001
1002 static __inline void si_heqi(qword a, unsigned int b)
1003 {
1004 union {
1005 vector unsigned int v;
1006 unsigned int i[4];
1007 } aa;
1008
1009 aa.v = (vector unsigned int)(a);
1010
1011 if (aa.i[0] == b) { SPU_HALT_ACTION; };
1012 }
1013
1014 static __inline void si_hgt(qword a, qword b)
1015 {
1016 union {
1017 vector signed int v;
1018 signed int i[4];
1019 } aa, bb;
1020
1021 aa.v = (vector signed int)(a);
1022 bb.v = (vector signed int)(b);
1023
1024 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
1025 }
1026
1027 static __inline void si_hgti(qword a, signed int b)
1028 {
1029 union {
1030 vector signed int v;
1031 signed int i[4];
1032 } aa;
1033
1034 aa.v = (vector signed int)(a);
1035
1036 if (aa.i[0] > b) { SPU_HALT_ACTION; };
1037 }
1038
1039 static __inline void si_hlgt(qword a, qword b)
1040 {
1041 union {
1042 vector unsigned int v;
1043 unsigned int i[4];
1044 } aa, bb;
1045
1046 aa.v = (vector unsigned int)(a);
1047 bb.v = (vector unsigned int)(b);
1048
1049 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
1050 }
1051
1052 static __inline void si_hlgti(qword a, unsigned int b)
1053 {
1054 union {
1055 vector unsigned int v;
1056 unsigned int i[4];
1057 } aa;
1058
1059 aa.v = (vector unsigned int)(a);
1060
1061 if (aa.i[0] > b) { SPU_HALT_ACTION; };
1062 }
1063
1064
1065 /* Multiply and Add
1066 */
1067 static __inline qword si_mpya(qword a, qword b, qword c)
1068 {
1069 return ((qword)(vec_msum(vec_and((vec_short8)(a),
1070 ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})),
1071 (vec_short8)(b), (vec_int4)(c))));
1072 }
1073
1074 static __inline qword si_fma(qword a, qword b, qword c)
1075 {
1076 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
1077 }
1078
1079 static __inline qword si_dfma(qword a, qword b, qword c)
1080 {
1081 union {
1082 vec_double2 v;
1083 double d[2];
1084 } aa, bb, cc, dd;
1085
1086 aa.v = (vec_double2)(a);
1087 bb.v = (vec_double2)(b);
1088 cc.v = (vec_double2)(c);
1089 dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0];
1090 dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1];
1091 return ((qword)(dd.v));
1092 }
1093
1094 /* Form Mask
1095 */
1096 #define si_fsmbi(_a) si_fsmb(si_from_int(_a))
1097
1098 static __inline qword si_fsmb(qword a)
1099 {
1100 vec_char16 mask;
1101 vec_ushort8 in;
1102
1103 in = (vec_ushort8)(a);
1104 mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2,
1105 3, 3, 3, 3, 3, 3, 3, 3})));
1106 return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7,
1107 0, 1, 2, 3, 4, 5, 6, 7})),
1108 vec_splat_u8(7))));
1109 }
1110
1111
1112 static __inline qword si_fsmh(qword a)
1113 {
1114 vec_uchar16 in;
1115 vec_short8 mask;
1116
1117 in = (vec_uchar16)(a);
1118 mask = (vec_short8)(vec_splat(in, 3));
1119 return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})),
1120 vec_splat_u16(15))));
1121 }
1122
1123 static __inline qword si_fsm(qword a)
1124 {
1125 vec_uchar16 in;
1126 vec_int4 mask;
1127
1128 in = (vec_uchar16)(a);
1129 mask = (vec_int4)(vec_splat(in, 3));
1130 return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})),
1131 ((vec_uint4){31,31,31,31}))));
1132 }
1133
1134 /* Move from/to registers
1135 */
1136 #define si_fscrrd() ((qword)((vec_uint4){0}))
1137 #define si_fscrwr(_a)
1138
1139 #define si_mfspr(_reg) ((qword)((vec_uint4){0}))
1140 #define si_mtspr(_reg, _a)
1141
1142 /* Multiply High High Add
1143 */
1144 static __inline qword si_mpyhha(qword a, qword b, qword c)
1145 {
1146 return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c))));
1147 }
1148
1149 static __inline qword si_mpyhhau(qword a, qword b, qword c)
1150 {
1151 return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c))));
1152 }
1153
1154 /* Multiply Subtract
1155 */
1156 static __inline qword si_fms(qword a, qword b, qword c)
1157 {
1158 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b),
1159 vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
1160 }
1161
1162 static __inline qword si_dfms(qword a, qword b, qword c)
1163 {
1164 union {
1165 vec_double2 v;
1166 double d[2];
1167 } aa, bb, cc, dd;
1168
1169 aa.v = (vec_double2)(a);
1170 bb.v = (vec_double2)(b);
1171 cc.v = (vec_double2)(c);
1172 dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0];
1173 dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1];
1174 return ((qword)(dd.v));
1175 }
1176
1177 /* Multiply
1178 */
1179 static __inline qword si_fm(qword a, qword b)
1180 {
1181 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f}))));
1182 }
1183
1184 static __inline qword si_dfm(qword a, qword b)
1185 {
1186 union {
1187 vec_double2 v;
1188 double d[2];
1189 } aa, bb, dd;
1190
1191 aa.v = (vec_double2)(a);
1192 bb.v = (vec_double2)(b);
1193 dd.d[0] = aa.d[0] * bb.d[0];
1194 dd.d[1] = aa.d[1] * bb.d[1];
1195 return ((qword)(dd.v));
1196 }
1197
1198 /* Multiply High
1199 */
1200 static __inline qword si_mpyh(qword a, qword b)
1201 {
1202 vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16};
1203
1204 return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen)));
1205 }
1206
1207
1208 /* Multiply High High
1209 */
1210 static __inline qword si_mpyhh(qword a, qword b)
1211 {
1212 return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b))));
1213 }
1214
1215 static __inline qword si_mpyhhu(qword a, qword b)
1216 {
1217 return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b))));
1218 }
1219
1220 /* Multiply Odd
1221 */
1222 static __inline qword si_mpy(qword a, qword b)
1223 {
1224 return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b))));
1225 }
1226
1227 static __inline qword si_mpyu(qword a, qword b)
1228 {
1229 return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b))));
1230 }
1231
1232 static __inline qword si_mpyi(qword a, short b)
1233 {
1234 return ((qword)(vec_mulo((vec_short8)(a),
1235 vec_splat((vec_short8)(si_from_short(b)), 1))));
1236 }
1237
1238 static __inline qword si_mpyui(qword a, unsigned short b)
1239 {
1240 return ((qword)(vec_mulo((vec_ushort8)(a),
1241 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
1242 }
1243
1244 /* Multiply and Shift Right
1245 */
1246 static __inline qword si_mpys(qword a, qword b)
1247 {
1248 return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16}))));
1249 }
1250
1251 /* Nand
1252 */
1253 static __inline qword si_nand(qword a, qword b)
1254 {
1255 vec_uchar16 d;
1256
1257 d = vec_and((vec_uchar16)(a), (vec_uchar16)(b));
1258 return ((qword)(vec_nor(d, d)));
1259 }
1260
1261 /* Negative Multiply Add
1262 */
1263 static __inline qword si_dfnma(qword a, qword b, qword c)
1264 {
1265 union {
1266 vec_double2 v;
1267 double d[2];
1268 } aa, bb, cc, dd;
1269
1270 aa.v = (vec_double2)(a);
1271 bb.v = (vec_double2)(b);
1272 cc.v = (vec_double2)(c);
1273 dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0];
1274 dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1];
1275 return ((qword)(dd.v));
1276 }
1277
1278 /* Negative Multiply and Subtract
1279 */
1280 static __inline qword si_fnms(qword a, qword b, qword c)
1281 {
1282 return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
1283 }
1284
1285 static __inline qword si_dfnms(qword a, qword b, qword c)
1286 {
1287 union {
1288 vec_double2 v;
1289 double d[2];
1290 } aa, bb, cc, dd;
1291
1292 aa.v = (vec_double2)(a);
1293 bb.v = (vec_double2)(b);
1294 cc.v = (vec_double2)(c);
1295 dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0];
1296 dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1];
1297 return ((qword)(dd.v));
1298 }
1299
1300 /* Nor
1301 */
1302 static __inline qword si_nor(qword a, qword b)
1303 {
1304 return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b))));
1305 }
1306
1307 /* Or
1308 */
1309 static __inline qword si_or(qword a, qword b)
1310 {
1311 return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b))));
1312 }
1313
1314 static __inline qword si_orbi(qword a, unsigned char b)
1315 {
1316 return ((qword)(vec_or((vec_uchar16)(a),
1317 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
1318 }
1319
1320 static __inline qword si_orhi(qword a, unsigned short b)
1321 {
1322 return ((qword)(vec_or((vec_ushort8)(a),
1323 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
1324 }
1325
1326 static __inline qword si_ori(qword a, unsigned int b)
1327 {
1328 return ((qword)(vec_or((vec_uint4)(a),
1329 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
1330 }
1331
1332 /* Or Complement
1333 */
1334 static __inline qword si_orc(qword a, qword b)
1335 {
1336 return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b)))));
1337 }
1338
1339
1340 /* Or Across
1341 */
1342 static __inline qword si_orx(qword a)
1343 {
1344 vec_uchar16 tmp;
1345 tmp = (vec_uchar16)(a);
1346 tmp = vec_or(tmp, vec_sld(tmp, tmp, 8));
1347 tmp = vec_or(tmp, vec_sld(tmp, tmp, 4));
1348 return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00,
1349 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}))));
1350 }
1351
1352
1353 /* Estimates
1354 */
1355 static __inline qword si_frest(qword a)
1356 {
1357 return ((qword)(vec_re((vec_float4)(a))));
1358 }
1359
1360 static __inline qword si_frsqest(qword a)
1361 {
1362 return ((qword)(vec_rsqrte((vec_float4)(a))));
1363 }
1364
1365 #define si_fi(_a, _d) (_d)
1366
1367 /* Channel Read and Write
1368 */
1369 #define si_rdch(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */
1370 #define si_rchcnt(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */
1371 #define si_wrch(_channel, _a) /* not mappable */
1372
1373 /* Rotate Left
1374 */
1375 static __inline qword si_roth(qword a, qword b)
1376 {
1377 return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b))));
1378 }
1379
1380 static __inline qword si_rot(qword a, qword b)
1381 {
1382 return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b))));
1383 }
1384
1385 static __inline qword si_rothi(qword a, int b)
1386 {
1387 return ((qword)(vec_rl((vec_ushort8)(a),
1388 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
1389 }
1390
1391 static __inline qword si_roti(qword a, int b)
1392 {
1393 return ((qword)(vec_rl((vec_uint4)(a),
1394 vec_splat((vec_uint4)(si_from_int(b)), 0))));
1395 }
1396
1397 /* Rotate Left with Mask
1398 */
1399 static __inline qword si_rothm(qword a, qword b)
1400 {
1401 vec_ushort8 neg_b;
1402 vec_ushort8 mask;
1403
1404 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
1405 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
1406 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
1407 }
1408
1409 static __inline qword si_rotm(qword a, qword b)
1410 {
1411 vec_uint4 neg_b;
1412 vec_uint4 mask;
1413
1414 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
1415 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1416 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
1417 }
1418
1419 static __inline qword si_rothmi(qword a, int b)
1420 {
1421 vec_ushort8 neg_b;
1422 vec_ushort8 mask;
1423
1424 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
1425 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
1426 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
1427 }
1428
1429 static __inline qword si_rotmi(qword a, int b)
1430 {
1431 vec_uint4 neg_b;
1432 vec_uint4 mask;
1433
1434 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
1435 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1436 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
1437 }
1438
1439
1440 /* Rotate Left Algebraic with Mask
1441 */
1442 static __inline qword si_rotmah(qword a, qword b)
1443 {
1444 vec_ushort8 neg_b;
1445 vec_ushort8 mask;
1446
1447 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
1448 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
1449 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
1450 }
1451
1452 static __inline qword si_rotma(qword a, qword b)
1453 {
1454 vec_uint4 neg_b;
1455 vec_uint4 mask;
1456
1457 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
1458 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1459 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
1460 }
1461
1462
1463 static __inline qword si_rotmahi(qword a, int b)
1464 {
1465 vec_ushort8 neg_b;
1466 vec_ushort8 mask;
1467
1468 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
1469 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
1470 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
1471 }
1472
1473 static __inline qword si_rotmai(qword a, int b)
1474 {
1475 vec_uint4 neg_b;
1476 vec_uint4 mask;
1477
1478 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
1479 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1480 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
1481 }
1482
1483
1484 /* Rotate Left Quadword by Bytes with Mask
1485 */
1486 static __inline qword si_rotqmbyi(qword a, int count)
1487 {
1488 union {
1489 vec_uchar16 v;
1490 int i[4];
1491 } x;
1492 vec_uchar16 mask;
1493
1494 count = 0 - count;
1495 x.i[3] = count << 3;
1496 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
1497
1498 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
1499 }
1500
1501
1502 static __inline qword si_rotqmby(qword a, qword count)
1503 {
1504 union {
1505 vec_uchar16 v;
1506 int i[4];
1507 } x;
1508 int cnt;
1509 vec_uchar16 mask;
1510
1511 x.v = (vec_uchar16)(count);
1512 x.i[0] = cnt = (0 - x.i[0]) << 3;
1513
1514 x.v = vec_splat(x.v, 3);
1515 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
1516
1517 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
1518 }
1519
1520
1521 /* Rotate Left Quadword by Bytes
1522 */
1523 static __inline qword si_rotqbyi(qword a, int count)
1524 {
1525 union {
1526 vec_uchar16 v;
1527 int i[4];
1528 } left, right;
1529
1530 count <<= 3;
1531 left.i[3] = count;
1532 right.i[3] = 0 - count;
1533 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v))));
1534 }
1535
1536 static __inline qword si_rotqby(qword a, qword count)
1537 {
1538 vec_uchar16 left, right;
1539
1540 left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
1541 right = vec_sub(vec_splat_u8(0), left);
1542 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
1543 }
1544
1545 /* Rotate Left Quadword by Bytes Bit Count
1546 */
1547 static __inline qword si_rotqbybi(qword a, qword count)
1548 {
1549 vec_uchar16 left, right;
1550
1551 left = vec_splat((vec_uchar16)(count), 3);
1552 right = vec_sub(vec_splat_u8(7), left);
1553 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
1554 }
1555
1556
1557 /* Rotate Left Quadword by Bytes Bit Count
1558 */
1559 static __inline qword si_rotqbii(qword a, int count)
1560 {
1561 vec_uchar16 x, y;
1562 vec_uchar16 result;
1563
1564 x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
1565 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
1566 (vec_uint4)vec_sub(vec_splat_u8(8), x)));
1567 result = vec_or(vec_sll((qword)(a), x), y);
1568 return ((qword)(result));
1569 }
1570
1571 static __inline qword si_rotqbi(qword a, qword count)
1572 {
1573 vec_uchar16 x, y;
1574 vec_uchar16 result;
1575
1576 x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
1577 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
1578 (vec_uint4)vec_sub(vec_splat_u8(8), x)));
1579
1580 result = vec_or(vec_sll((qword)(a), x), y);
1581 return ((qword)(result));
1582 }
1583
1584
1585 /* Rotate Left Quadword and Mask by Bits
1586 */
1587 static __inline qword si_rotqmbii(qword a, int count)
1588 {
1589 return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3))));
1590 }
1591
1592 static __inline qword si_rotqmbi(qword a, qword count)
1593 {
1594 return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3)))));
1595 }
1596
1597
1598 /* Rotate Left Quadword and Mask by Bytes with Bit Count
1599 */
1600 static __inline qword si_rotqmbybi(qword a, qword count)
1601 {
1602 union {
1603 vec_uchar16 v;
1604 int i[4];
1605 } x;
1606 int cnt;
1607 vec_uchar16 mask;
1608
1609 x.v = (vec_uchar16)(count);
1610 x.i[0] = cnt = 0 - (x.i[0] & ~7);
1611 x.v = vec_splat(x.v, 3);
1612 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
1613
1614 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
1615 }
1616
1617
1618
1619
1620 /* Round Double to Float
1621 */
1622 static __inline qword si_frds(qword a)
1623 {
1624 union {
1625 vec_float4 v;
1626 float f[4];
1627 } d;
1628 union {
1629 vec_double2 v;
1630 double d[2];
1631 } in;
1632
1633 in.v = (vec_double2)(a);
1634 d.v = (vec_float4){0.0f};
1635 d.f[0] = (float)in.d[0];
1636 d.f[2] = (float)in.d[1];
1637
1638 return ((qword)(d.v));
1639 }
1640
1641 /* Select Bits
1642 */
1643 static __inline qword si_selb(qword a, qword b, qword c)
1644 {
1645 return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c))));
1646 }
1647
1648
1649 /* Shuffle Bytes
1650 */
1651 static __inline qword si_shufb(qword a, qword b, qword pattern)
1652 {
1653 vec_uchar16 pat;
1654
1655 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}),
1656 vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
1657 vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
1658 return ((qword)(vec_perm(vec_perm(a, b, pattern),
1659 ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
1660 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
1661 pat)));
1662 }
1663
1664
1665 /* Shift Left
1666 */
1667 static __inline qword si_shlh(qword a, qword b)
1668 {
1669 vec_ushort8 mask;
1670
1671 mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15));
1672 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask)));
1673 }
1674
1675 static __inline qword si_shl(qword a, qword b)
1676 {
1677 vec_uint4 mask;
1678
1679 mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1680 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask)));
1681 }
1682
1683
1684 static __inline qword si_shlhi(qword a, unsigned int b)
1685 {
1686 vec_ushort8 mask;
1687 vec_ushort8 bv;
1688
1689 bv = vec_splat((vec_ushort8)(si_from_int(b)), 1);
1690 mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15));
1691 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask)));
1692 }
1693
1694 static __inline qword si_shli(qword a, unsigned int b)
1695 {
1696 vec_uint4 bv;
1697 vec_uint4 mask;
1698
1699 bv = vec_splat((vec_uint4)(si_from_uint(b)), 0);
1700 mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
1701 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask)));
1702 }
1703
1704
1705 /* Shift Left Quadword
1706 */
1707 static __inline qword si_shlqbii(qword a, unsigned int count)
1708 {
1709 vec_uchar16 x;
1710
1711 x = vec_splat((vec_uchar16)(si_from_uint(count)), 3);
1712 return ((qword)(vec_sll((vec_uchar16)(a), x)));
1713 }
1714
1715 static __inline qword si_shlqbi(qword a, qword count)
1716 {
1717 vec_uchar16 x;
1718
1719 x = vec_splat((vec_uchar16)(count), 3);
1720 return ((qword)(vec_sll((vec_uchar16)(a), x)));
1721 }
1722
1723
1724 /* Shift Left Quadword by Bytes
1725 */
1726 static __inline qword si_shlqbyi(qword a, unsigned int count)
1727 {
1728 union {
1729 vec_uchar16 v;
1730 int i[4];
1731 } x;
1732 vec_uchar16 mask;
1733
1734 x.i[3] = count << 3;
1735 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
1736 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
1737 }
1738
1739 static __inline qword si_shlqby(qword a, qword count)
1740 {
1741 union {
1742 vec_uchar16 v;
1743 unsigned int i[4];
1744 } x;
1745 unsigned int cnt;
1746 vec_uchar16 mask;
1747
1748 x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
1749 cnt = x.i[0];
1750 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
1751 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
1752 }
1753
1754 /* Shift Left Quadword by Bytes with Bit Count
1755 */
1756 static __inline qword si_shlqbybi(qword a, qword count)
1757 {
1758 union {
1759 vec_uchar16 v;
1760 int i[4];
1761 } x;
1762 unsigned int cnt;
1763 vec_uchar16 mask;
1764
1765 x.v = vec_splat((vec_uchar16)(count), 3);
1766 cnt = x.i[0];
1767 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
1768 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
1769 }
1770
1771
1772 /* Stop and Signal
1773 */
1774 #define si_stop(_type) SPU_STOP_ACTION
1775 #define si_stopd(a, b, c) SPU_STOP_ACTION
1776
1777
1778 /* Subtract
1779 */
1780 static __inline qword si_sfh(qword a, qword b)
1781 {
1782 return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a))));
1783 }
1784
1785 static __inline qword si_sf(qword a, qword b)
1786 {
1787 return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a))));
1788 }
1789
1790 static __inline qword si_fs(qword a, qword b)
1791 {
1792 return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b))));
1793 }
1794
1795 static __inline qword si_dfs(qword a, qword b)
1796 {
1797 union {
1798 vec_double2 v;
1799 double d[2];
1800 } aa, bb, dd;
1801
1802 aa.v = (vec_double2)(a);
1803 bb.v = (vec_double2)(b);
1804 dd.d[0] = aa.d[0] - bb.d[0];
1805 dd.d[1] = aa.d[1] - bb.d[1];
1806 return ((qword)(dd.v));
1807 }
1808
1809 static __inline qword si_sfhi(qword a, short b)
1810 {
1811 return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1),
1812 (vec_short8)(a))));
1813 }
1814
1815 static __inline qword si_sfi(qword a, int b)
1816 {
1817 return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0),
1818 (vec_int4)(a))));
1819 }
1820
1821 /* Subtract word extended
1822 */
1823 #define si_sfx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_b), \
1824 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), \
1825 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
1826
1827
1828 /* Sum Bytes into Shorts
1829 */
1830 static __inline qword si_sumb(qword a, qword b)
1831 {
1832 vec_uint4 zero = (vec_uint4){0};
1833 vec_ushort8 sum_a, sum_b;
1834
1835 sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
1836 sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
1837
1838 return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19, 2, 3, 22, 23, 6, 7,
1839 26, 27, 10, 11, 30, 31, 14, 15}))));
1840 }
1841
1842 /* Exclusive OR
1843 */
1844 static __inline qword si_xor(qword a, qword b)
1845 {
1846 return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b))));
1847 }
1848
1849 static __inline qword si_xorbi(qword a, unsigned char b)
1850 {
1851 return ((qword)(vec_xor((vec_uchar16)(a),
1852 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
1853 }
1854
1855 static __inline qword si_xorhi(qword a, unsigned short b)
1856 {
1857 return ((qword)(vec_xor((vec_ushort8)(a),
1858 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
1859 }
1860
1861 static __inline qword si_xori(qword a, unsigned int b)
1862 {
1863 return ((qword)(vec_xor((vec_uint4)(a),
1864 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
1865 }
1866
1867
1868 /* Generate Controls for Sub-Quadword Insertion
1869 */
1870 static __inline qword si_cbd(qword a, int imm)
1871 {
1872 union {
1873 vec_uint4 v;
1874 unsigned char c[16];
1875 } shmask;
1876
1877 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1878 shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03;
1879 return ((qword)(shmask.v));
1880 }
1881
1882 static __inline qword si_cdd(qword a, int imm)
1883 {
1884 union {
1885 vec_uint4 v;
1886 unsigned long long ll[2];
1887 } shmask;
1888
1889 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1890 shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL;
1891 return ((qword)(shmask.v));
1892 }
1893
1894 static __inline qword si_chd(qword a, int imm)
1895 {
1896 union {
1897 vec_uint4 v;
1898 unsigned short s[8];
1899 } shmask;
1900
1901 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1902 shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203;
1903 return ((qword)(shmask.v));
1904 }
1905
1906 static __inline qword si_cwd(qword a, int imm)
1907 {
1908 union {
1909 vec_uint4 v;
1910 unsigned int i[4];
1911 } shmask;
1912
1913 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1914 shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203;
1915 return ((qword)(shmask.v));
1916 }
1917
1918 static __inline qword si_cbx(qword a, qword b)
1919 {
1920 union {
1921 vec_uint4 v;
1922 unsigned char c[16];
1923 } shmask;
1924
1925 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1926 shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03;
1927 return ((qword)(shmask.v));
1928 }
1929
1930
1931 static __inline qword si_cdx(qword a, qword b)
1932 {
1933 union {
1934 vec_uint4 v;
1935 unsigned long long ll[2];
1936 } shmask;
1937
1938 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1939 shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL;
1940 return ((qword)(shmask.v));
1941 }
1942
1943 static __inline qword si_chx(qword a, qword b)
1944 {
1945 union {
1946 vec_uint4 v;
1947 unsigned short s[8];
1948 } shmask;
1949
1950 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1951 shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203;
1952 return ((qword)(shmask.v));
1953 }
1954
1955 static __inline qword si_cwx(qword a, qword b)
1956 {
1957 union {
1958 vec_uint4 v;
1959 unsigned int i[4];
1960 } shmask;
1961
1962 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
1963 shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203;
1964 return ((qword)(shmask.v));
1965 }
1966
1967
1968 /* Constant Formation
1969 */
1970 static __inline qword si_il(signed short imm)
1971 {
1972 return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0)));
1973 }
1974
1975
1976 static __inline qword si_ila(unsigned int imm)
1977 {
1978 return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0)));
1979 }
1980
1981 static __inline qword si_ilh(signed short imm)
1982 {
1983 return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1)));
1984 }
1985
1986 static __inline qword si_ilhu(signed short imm)
1987 {
1988 return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0)));
1989 }
1990
1991 static __inline qword si_iohl(qword a, unsigned short imm)
1992 {
1993 return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0))));
1994 }
1995
1996 /* No Operation
1997 */
1998 #define si_lnop() /* do nothing */
1999 #define si_nop() /* do nothing */
2000
2001
2002 /* Memory Load and Store
2003 */
2004 static __inline qword si_lqa(unsigned int imm)
2005 {
2006 return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
2007 }
2008
2009 static __inline qword si_lqd(qword a, unsigned int imm)
2010 {
2011 return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm))));
2012 }
2013
2014 static __inline qword si_lqr(unsigned int imm)
2015 {
2016 return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
2017 }
2018
2019 static __inline qword si_lqx(qword a, qword b)
2020 {
2021 return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0))));
2022 }
2023
2024 static __inline void si_stqa(qword a, unsigned int imm)
2025 {
2026 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
2027 }
2028
2029 static __inline void si_stqd(qword a, qword b, unsigned int imm)
2030 {
2031 vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm));
2032 }
2033
2034 static __inline void si_stqr(qword a, unsigned int imm)
2035 {
2036 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
2037 }
2038
2039 static __inline void si_stqx(qword a, qword b, qword c)
2040 {
2041 vec_st((vec_uchar16)(a),
2042 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
2043 (vector unsigned char *)(0));
2044 }
2045
2046 #endif /* !__SPU__ */
2047 #endif /* !_SI2VMX_H_ */
2048