0
|
1 /* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
|
|
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is free software; you can redistribute it and/or modify it under
|
|
5 the terms of the GNU General Public License as published by the Free
|
|
6 Software Foundation; either version 3 of the License, or (at your option)
|
|
7 any later version.
|
|
8
|
|
9 This file is distributed in the hope that it will be useful, but WITHOUT
|
|
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 for more details.
|
|
13
|
|
14 Under Section 7 of GPL version 3, you are granted additional
|
|
15 permissions described in the GCC Runtime Library Exception, version
|
|
16 3.1, as published by the Free Software Foundation.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License and
|
|
19 a copy of the GCC Runtime Library Exception along with this program;
|
|
20 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
21 <http://www.gnu.org/licenses/>. */
|
|
22
|
|
23 #ifndef _SI2VMX_H_
|
|
24 #define _SI2VMX_H_ 1
|
|
25
|
|
26 #ifndef __SPU__
|
|
27
|
|
28 #include <stdlib.h>
|
|
29 #include <vec_types.h>
|
|
30
|
|
31
|
|
32 /* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
|
|
33 * Users can override the action by defining it prior to including this
|
|
34 * header file.
|
|
35 */
|
|
36 #ifndef SPU_HALT_ACTION
|
|
37 #define SPU_HALT_ACTION abort()
|
|
38 #endif
|
|
39
|
|
40 /* Specify a default stop action for the spu_stop intrinsic.
|
|
41 * Users can override the action by defining it prior to including this
|
|
42 * header file.
|
|
43 */
|
|
44 #ifndef SPU_STOP_ACTION
|
|
45 #define SPU_STOP_ACTION abort()
|
|
46 #endif
|
|
47
|
|
48
|
|
49 /* Specify a default action for unsupported intrinsic.
|
|
50 * Users can override the action by defining it prior to including this
|
|
51 * header file.
|
|
52 */
|
|
53 #ifndef SPU_UNSUPPORTED_ACTION
|
|
54 #define SPU_UNSUPPORTED_ACTION abort()
|
|
55 #endif
|
|
56
|
|
57
|
|
58 /* Casting intrinsics - from scalar to quadword
|
|
59 */
|
|
60
|
|
61 static __inline qword si_from_uchar(unsigned char c) {
|
|
62 union {
|
|
63 qword q;
|
|
64 unsigned char c[16];
|
|
65 } x;
|
|
66 x.c[3] = c;
|
|
67 return (x.q);
|
|
68 }
|
|
69
|
|
70 static __inline qword si_from_char(signed char c) {
|
|
71 union {
|
|
72 qword q;
|
|
73 signed char c[16];
|
|
74 } x;
|
|
75 x.c[3] = c;
|
|
76 return (x.q);
|
|
77 }
|
|
78
|
|
79 static __inline qword si_from_ushort(unsigned short s) {
|
|
80 union {
|
|
81 qword q;
|
|
82 unsigned short s[8];
|
|
83 } x;
|
|
84 x.s[1] = s;
|
|
85 return (x.q);
|
|
86 }
|
|
87
|
|
88 static __inline qword si_from_short(short s) {
|
|
89 union {
|
|
90 qword q;
|
|
91 short s[8];
|
|
92 } x;
|
|
93 x.s[1] = s;
|
|
94 return (x.q);
|
|
95 }
|
|
96
|
|
97
|
|
98 static __inline qword si_from_uint(unsigned int i) {
|
|
99 union {
|
|
100 qword q;
|
|
101 unsigned int i[4];
|
|
102 } x;
|
|
103 x.i[0] = i;
|
|
104 return (x.q);
|
|
105 }
|
|
106
|
|
107 static __inline qword si_from_int(int i) {
|
|
108 union {
|
|
109 qword q;
|
|
110 int i[4];
|
|
111 } x;
|
|
112 x.i[0] = i;
|
|
113 return (x.q);
|
|
114 }
|
|
115
|
|
116 static __inline qword si_from_ullong(unsigned long long l) {
|
|
117 union {
|
|
118 qword q;
|
|
119 unsigned long long l[2];
|
|
120 } x;
|
|
121 x.l[0] = l;
|
|
122 return (x.q);
|
|
123 }
|
|
124
|
|
125 static __inline qword si_from_llong(long long l) {
|
|
126 union {
|
|
127 qword q;
|
|
128 long long l[2];
|
|
129 } x;
|
|
130 x.l[0] = l;
|
|
131 return (x.q);
|
|
132 }
|
|
133
|
|
134 static __inline qword si_from_float(float f) {
|
|
135 union {
|
|
136 qword q;
|
|
137 float f[4];
|
|
138 } x;
|
|
139 x.f[0] = f;
|
|
140 return (x.q);
|
|
141 }
|
|
142
|
|
143 static __inline qword si_from_double(double d) {
|
|
144 union {
|
|
145 qword q;
|
|
146 double d[2];
|
|
147 } x;
|
|
148 x.d[0] = d;
|
|
149 return (x.q);
|
|
150 }
|
|
151
|
|
152 static __inline qword si_from_ptr(void *ptr) {
|
|
153 union {
|
|
154 qword q;
|
|
155 void *p;
|
|
156 } x;
|
|
157 x.p = ptr;
|
|
158 return (x.q);
|
|
159 }
|
|
160
|
|
161
|
|
162 /* Casting intrinsics - from quadword to scalar
|
|
163 */
|
|
164 static __inline unsigned char si_to_uchar(qword q) {
|
|
165 union {
|
|
166 qword q;
|
|
167 unsigned char c[16];
|
|
168 } x;
|
|
169 x.q = q;
|
|
170 return (x.c[3]);
|
|
171 }
|
|
172
|
|
173 static __inline signed char si_to_char(qword q) {
|
|
174 union {
|
|
175 qword q;
|
|
176 signed char c[16];
|
|
177 } x;
|
|
178 x.q = q;
|
|
179 return (x.c[3]);
|
|
180 }
|
|
181
|
|
182 static __inline unsigned short si_to_ushort(qword q) {
|
|
183 union {
|
|
184 qword q;
|
|
185 unsigned short s[8];
|
|
186 } x;
|
|
187 x.q = q;
|
|
188 return (x.s[1]);
|
|
189 }
|
|
190
|
|
191 static __inline short si_to_short(qword q) {
|
|
192 union {
|
|
193 qword q;
|
|
194 short s[8];
|
|
195 } x;
|
|
196 x.q = q;
|
|
197 return (x.s[1]);
|
|
198 }
|
|
199
|
|
200 static __inline unsigned int si_to_uint(qword q) {
|
|
201 union {
|
|
202 qword q;
|
|
203 unsigned int i[4];
|
|
204 } x;
|
|
205 x.q = q;
|
|
206 return (x.i[0]);
|
|
207 }
|
|
208
|
|
209 static __inline int si_to_int(qword q) {
|
|
210 union {
|
|
211 qword q;
|
|
212 int i[4];
|
|
213 } x;
|
|
214 x.q = q;
|
|
215 return (x.i[0]);
|
|
216 }
|
|
217
|
|
218 static __inline unsigned long long si_to_ullong(qword q) {
|
|
219 union {
|
|
220 qword q;
|
|
221 unsigned long long l[2];
|
|
222 } x;
|
|
223 x.q = q;
|
|
224 return (x.l[0]);
|
|
225 }
|
|
226
|
|
227 static __inline long long si_to_llong(qword q) {
|
|
228 union {
|
|
229 qword q;
|
|
230 long long l[2];
|
|
231 } x;
|
|
232 x.q = q;
|
|
233 return (x.l[0]);
|
|
234 }
|
|
235
|
|
236 static __inline float si_to_float(qword q) {
|
|
237 union {
|
|
238 qword q;
|
|
239 float f[4];
|
|
240 } x;
|
|
241 x.q = q;
|
|
242 return (x.f[0]);
|
|
243 }
|
|
244
|
|
245 static __inline double si_to_double(qword q) {
|
|
246 union {
|
|
247 qword q;
|
|
248 double d[2];
|
|
249 } x;
|
|
250 x.q = q;
|
|
251 return (x.d[0]);
|
|
252 }
|
|
253
|
|
254 static __inline void * si_to_ptr(qword q) {
|
|
255 union {
|
|
256 qword q;
|
|
257 void *p;
|
|
258 } x;
|
|
259 x.q = q;
|
|
260 return (x.p);
|
|
261 }
|
|
262
|
|
263
|
|
264 /* Absolute difference
|
|
265 */
|
|
266 static __inline qword si_absdb(qword a, qword b)
|
|
267 {
|
|
268 vec_uchar16 ac, bc, dc;
|
|
269
|
|
270 ac = (vec_uchar16)(a);
|
|
271 bc = (vec_uchar16)(b);
|
|
272 dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc));
|
|
273
|
|
274 return ((qword)(dc));
|
|
275 }
|
|
276
|
|
277 /* Add intrinsics
|
|
278 */
|
|
279 #define si_a(_a, _b) ((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
|
|
280
|
|
281 #define si_ah(_a, _b) ((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b))))
|
|
282
|
|
283 static __inline qword si_ai(qword a, int b)
|
|
284 {
|
|
285 return ((qword)(vec_add((vec_int4)(a),
|
|
286 vec_splat((vec_int4)(si_from_int(b)), 0))));
|
|
287 }
|
|
288
|
|
289
|
|
290 static __inline qword si_ahi(qword a, short b)
|
|
291 {
|
|
292 return ((qword)(vec_add((vec_short8)(a),
|
|
293 vec_splat((vec_short8)(si_from_short(b)), 1))));
|
|
294 }
|
|
295
|
|
296
|
|
297 #define si_fa(_a, _b) ((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b))))
|
|
298
|
|
299
|
|
300 static __inline qword si_dfa(qword a, qword b)
|
|
301 {
|
|
302 union {
|
|
303 vec_double2 v;
|
|
304 double d[2];
|
|
305 } ad, bd, dd;
|
|
306
|
|
307 ad.v = (vec_double2)(a);
|
|
308 bd.v = (vec_double2)(b);
|
|
309 dd.d[0] = ad.d[0] + bd.d[0];
|
|
310 dd.d[1] = ad.d[1] + bd.d[1];
|
|
311
|
|
312 return ((qword)(dd.v));
|
|
313 }
|
|
314
|
|
315 /* Add word extended
|
|
316 */
|
|
317 #define si_addx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \
|
|
318 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
|
|
319
|
|
320
|
|
321 /* Bit-wise AND
|
|
322 */
|
|
323 #define si_and(_a, _b) ((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b))))
|
|
324
|
|
325
|
|
326 static __inline qword si_andbi(qword a, signed char b)
|
|
327 {
|
|
328 return ((qword)(vec_and((vec_char16)(a),
|
|
329 vec_splat((vec_char16)(si_from_char(b)), 3))));
|
|
330 }
|
|
331
|
|
332 static __inline qword si_andhi(qword a, signed short b)
|
|
333 {
|
|
334 return ((qword)(vec_and((vec_short8)(a),
|
|
335 vec_splat((vec_short8)(si_from_short(b)), 1))));
|
|
336 }
|
|
337
|
|
338
|
|
339 static __inline qword si_andi(qword a, signed int b)
|
|
340 {
|
|
341 return ((qword)(vec_and((vec_int4)(a),
|
|
342 vec_splat((vec_int4)(si_from_int(b)), 0))));
|
|
343 }
|
|
344
|
|
345
|
|
346 /* Bit-wise AND with complement
|
|
347 */
|
|
348 #define si_andc(_a, _b) ((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b))))
|
|
349
|
|
350
|
|
351 /* Average byte vectors
|
|
352 */
|
|
353 #define si_avgb(_a, _b) ((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b))))
|
|
354
|
|
355
|
|
356 /* Branch indirect and set link on external data
|
|
357 */
|
|
358 #define si_bisled(_func) /* not mappable */
|
|
359 #define si_bisledd(_func) /* not mappable */
|
|
360 #define si_bislede(_func) /* not mappable */
|
|
361
|
|
362
|
|
363 /* Borrow generate
|
|
364 */
|
|
365 #define si_bg(_a, _b) ((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a))))
|
|
366
|
|
367 #define si_bgx(_a, _b, _c) ((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)), \
|
|
368 vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), \
|
|
369 (vec_uint4)(_c))), vec_splat_u32(1))))
|
|
370
|
|
371 /* Compare absolute equal
|
|
372 */
|
|
373 static __inline qword si_fcmeq(qword a, qword b)
|
|
374 {
|
|
375 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
|
|
376
|
|
377 return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb),
|
|
378 vec_andc((vec_float4)(b), msb))));
|
|
379 }
|
|
380
|
|
381 static __inline qword si_dfcmeq(qword a, qword b)
|
|
382 {
|
|
383 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
|
|
384 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
|
|
385 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27};
|
|
386
|
|
387 vec_uint4 biteq;
|
|
388 vec_uint4 aabs;
|
|
389 vec_uint4 babs;
|
|
390 vec_uint4 a_gt;
|
|
391 vec_uint4 ahi_inf;
|
|
392 vec_uint4 anan;
|
|
393 vec_uint4 result;
|
|
394
|
|
395 union {
|
|
396 vec_uchar16 v;
|
|
397 int i[4];
|
|
398 } x;
|
|
399
|
|
400 /* Shift 4 bytes */
|
|
401 x.i[3] = 4 << 3;
|
|
402
|
|
403 /* Mask out sign bits */
|
|
404 aabs = vec_and((vec_uint4)a,sign_mask);
|
|
405 babs = vec_and((vec_uint4)b,sign_mask);
|
|
406
|
|
407 /* A) Check for bit equality, store in high word */
|
|
408 biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
|
|
409 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
|
|
410
|
|
411 /*
|
|
412 B) Check if a is NaN, store in high word
|
|
413
|
|
414 B1) If the high word is greater than max_exp (indicates a NaN)
|
|
415 B2) If the low word is greater than 0
|
|
416 */
|
|
417 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
|
|
418
|
|
419 /* B3) Check if the high word is equal to the inf exponent */
|
|
420 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
|
|
421
|
|
422 /* anan = B1[hi] or (B2[lo] and B3[hi]) */
|
|
423 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
|
|
424
|
|
425 /* result = A and not B */
|
|
426 result = vec_andc(biteq, anan);
|
|
427
|
|
428 /* Promote high words to 64 bits and return */
|
|
429 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
|
|
430 }
|
|
431
|
|
432
|
|
433 /* Compare absolute greater than
|
|
434 */
|
|
435 static __inline qword si_fcmgt(qword a, qword b)
|
|
436 {
|
|
437 vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
|
|
438
|
|
439 return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
|
|
440 vec_andc((vec_float4)(b), msb))));
|
|
441 }
|
|
442
|
|
443 static __inline qword si_dfcmgt(qword a, qword b)
|
|
444 {
|
|
445 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
|
446 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
|
|
447 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
|
|
448
|
|
449 union {
|
|
450 vec_uchar16 v;
|
|
451 int i[4];
|
|
452 } x;
|
|
453
|
|
454 /* Shift 4 bytes */
|
|
455 x.i[3] = 4 << 3;
|
|
456
|
|
457 // absolute value of a,b
|
|
458 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
|
|
459 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
|
|
460
|
|
461 // check if a is nan
|
|
462 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
|
|
463 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
|
|
464 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
|
|
465 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
|
|
466
|
|
467 // check if b is nan
|
|
468 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
|
|
469 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
|
|
470 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
|
|
471 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
|
|
472
|
|
473 // A) Check if the exponents are different
|
|
474 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
|
|
475
|
|
476 // B) Check if high word equal, and low word greater
|
|
477 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs);
|
|
478 vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
|
|
479 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
|
|
480
|
|
481 // If either A or B is true, return true (unless NaNs detected)
|
|
482 vec_uint4 r = vec_or(gt_hi, eqgt);
|
|
483
|
|
484 // splat the high words of the comparison step
|
|
485 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
|
|
486
|
|
487 // correct for NaNs in input
|
|
488 return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
|
|
489 }
|
|
490
|
|
491
|
|
492 /* Compare equal
|
|
493 */
|
|
494 static __inline qword si_ceqb(qword a, qword b)
|
|
495 {
|
|
496 return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b))));
|
|
497 }
|
|
498
|
|
499 static __inline qword si_ceqh(qword a, qword b)
|
|
500 {
|
|
501 return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b))));
|
|
502 }
|
|
503
|
|
504 static __inline qword si_ceq(qword a, qword b)
|
|
505 {
|
|
506 return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b))));
|
|
507 }
|
|
508
|
|
509 static __inline qword si_fceq(qword a, qword b)
|
|
510 {
|
|
511 return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b))));
|
|
512 }
|
|
513
|
|
514 static __inline qword si_ceqbi(qword a, signed char b)
|
|
515 {
|
|
516 return ((qword)(vec_cmpeq((vec_char16)(a),
|
|
517 vec_splat((vec_char16)(si_from_char(b)), 3))));
|
|
518 }
|
|
519
|
|
520 static __inline qword si_ceqhi(qword a, signed short b)
|
|
521 {
|
|
522 return ((qword)(vec_cmpeq((vec_short8)(a),
|
|
523 vec_splat((vec_short8)(si_from_short(b)), 1))));
|
|
524 }
|
|
525
|
|
526 static __inline qword si_ceqi(qword a, signed int b)
|
|
527 {
|
|
528 return ((qword)(vec_cmpeq((vec_int4)(a),
|
|
529 vec_splat((vec_int4)(si_from_int(b)), 0))));
|
|
530 }
|
|
531
|
|
532 static __inline qword si_dfceq(qword a, qword b)
|
|
533 {
|
|
534 vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
|
|
535 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
|
|
536 vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27};
|
|
537
|
|
538 vec_uint4 biteq;
|
|
539 vec_uint4 aabs;
|
|
540 vec_uint4 babs;
|
|
541 vec_uint4 a_gt;
|
|
542 vec_uint4 ahi_inf;
|
|
543 vec_uint4 anan;
|
|
544 vec_uint4 iszero;
|
|
545 vec_uint4 result;
|
|
546
|
|
547 union {
|
|
548 vec_uchar16 v;
|
|
549 int i[4];
|
|
550 } x;
|
|
551
|
|
552 /* Shift 4 bytes */
|
|
553 x.i[3] = 4 << 3;
|
|
554
|
|
555 /* A) Check for bit equality, store in high word */
|
|
556 biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b);
|
|
557 biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
|
|
558
|
|
559 /* Mask out sign bits */
|
|
560 aabs = vec_and((vec_uint4)a,sign_mask);
|
|
561 babs = vec_and((vec_uint4)b,sign_mask);
|
|
562
|
|
563 /*
|
|
564 B) Check if a is NaN, store in high word
|
|
565
|
|
566 B1) If the high word is greater than max_exp (indicates a NaN)
|
|
567 B2) If the low word is greater than 0
|
|
568 */
|
|
569 a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
|
|
570
|
|
571 /* B3) Check if the high word is equal to the inf exponent */
|
|
572 ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
|
|
573
|
|
574 /* anan = B1[hi] or (B2[lo] and B3[hi]) */
|
|
575 anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
|
|
576
|
|
577 /* C) Check for 0 = -0 special case */
|
|
578 iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0));
|
|
579 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
|
|
580
|
|
581 /* result = (A or C) and not B */
|
|
582 result = vec_or(biteq,iszero);
|
|
583 result = vec_andc(result, anan);
|
|
584
|
|
585 /* Promote high words to 64 bits and return */
|
|
586 return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
|
|
587 }
|
|
588
|
|
589
|
|
590 /* Compare greater than
|
|
591 */
|
|
592 static __inline qword si_cgtb(qword a, qword b)
|
|
593 {
|
|
594 return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b))));
|
|
595 }
|
|
596
|
|
597 static __inline qword si_cgth(qword a, qword b)
|
|
598 {
|
|
599 return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b))));
|
|
600 }
|
|
601
|
|
602 static __inline qword si_cgt(qword a, qword b)
|
|
603 {
|
|
604 return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b))));
|
|
605 }
|
|
606
|
|
607 static __inline qword si_clgtb(qword a, qword b)
|
|
608 {
|
|
609 return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b))));
|
|
610 }
|
|
611
|
|
612 static __inline qword si_clgth(qword a, qword b)
|
|
613 {
|
|
614 return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b))));
|
|
615 }
|
|
616
|
|
617 static __inline qword si_clgt(qword a, qword b)
|
|
618 {
|
|
619 return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b))));
|
|
620 }
|
|
621
|
|
622 static __inline qword si_fcgt(qword a, qword b)
|
|
623 {
|
|
624 return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b))));
|
|
625 }
|
|
626
|
|
627 static __inline qword si_dfcgt(qword a, qword b)
|
|
628 {
|
|
629 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
|
630 vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 };
|
|
631 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
|
|
632 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
|
|
633
|
|
634 union {
|
|
635 vec_uchar16 v;
|
|
636 int i[4];
|
|
637 } x;
|
|
638
|
|
639 /* Shift 4 bytes */
|
|
640 x.i[3] = 4 << 3;
|
|
641
|
|
642 // absolute value of a,b
|
|
643 vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
|
|
644 vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
|
|
645
|
|
646 // check if a is nan
|
|
647 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
|
|
648 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
|
|
649 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
|
|
650 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
|
|
651
|
|
652 // check if b is nan
|
|
653 vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
|
|
654 vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
|
|
655 b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
|
|
656 b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
|
|
657
|
|
658 // sign of a
|
|
659 vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
|
|
660 asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi);
|
|
661
|
|
662 // sign of b
|
|
663 vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
|
|
664 bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi);
|
|
665
|
|
666 // negative a
|
|
667 vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs);
|
|
668 vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7)));
|
|
669 abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
|
|
670 vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1)));
|
|
671
|
|
672 // pick the one we want
|
|
673 vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel);
|
|
674
|
|
675 // negative b
|
|
676 vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs);
|
|
677 bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
|
|
678 vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1)));
|
|
679
|
|
680 // pick the one we want
|
|
681 vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
|
|
682
|
|
683 // A) Check if the exponents are different
|
|
684 vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
|
|
685
|
|
686 // B) Check if high word equal, and low word greater
|
|
687 vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval);
|
|
688 vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
|
|
689 vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
|
|
690
|
|
691 // If either A or B is true, return true (unless NaNs detected)
|
|
692 vec_uint4 r = vec_or(gt_hi, eqgt);
|
|
693
|
|
694 // splat the high words of the comparison step
|
|
695 r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
|
|
696
|
|
697 // correct for NaNs in input
|
|
698 return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
|
|
699 }
|
|
700
|
|
701 static __inline qword si_cgtbi(qword a, signed char b)
|
|
702 {
|
|
703 return ((qword)(vec_cmpgt((vec_char16)(a),
|
|
704 vec_splat((vec_char16)(si_from_char(b)), 3))));
|
|
705 }
|
|
706
|
|
707 static __inline qword si_cgthi(qword a, signed short b)
|
|
708 {
|
|
709 return ((qword)(vec_cmpgt((vec_short8)(a),
|
|
710 vec_splat((vec_short8)(si_from_short(b)), 1))));
|
|
711 }
|
|
712
|
|
713 static __inline qword si_cgti(qword a, signed int b)
|
|
714 {
|
|
715 return ((qword)(vec_cmpgt((vec_int4)(a),
|
|
716 vec_splat((vec_int4)(si_from_int(b)), 0))));
|
|
717 }
|
|
718
|
|
719 static __inline qword si_clgtbi(qword a, unsigned char b)
|
|
720 {
|
|
721 return ((qword)(vec_cmpgt((vec_uchar16)(a),
|
|
722 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
|
|
723 }
|
|
724
|
|
725 static __inline qword si_clgthi(qword a, unsigned short b)
|
|
726 {
|
|
727 return ((qword)(vec_cmpgt((vec_ushort8)(a),
|
|
728 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
|
|
729 }
|
|
730
|
|
731 static __inline qword si_clgti(qword a, unsigned int b)
|
|
732 {
|
|
733 return ((qword)(vec_cmpgt((vec_uint4)(a),
|
|
734 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
|
|
735 }
|
|
736
|
|
737 static __inline qword si_dftsv(qword a, char b)
|
|
738 {
|
|
739 vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
|
740 vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
|
|
741 vec_uint4 result = (vec_uint4){0};
|
|
742 vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
|
|
743 sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
|
|
744 vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
|
|
745
|
|
746 union {
|
|
747 vec_uchar16 v;
|
|
748 int i[4];
|
|
749 } x;
|
|
750
|
|
751 /* Shift 4 bytes */
|
|
752 x.i[3] = 4 << 3;
|
|
753
|
|
754 /* Nan or +inf or -inf */
|
|
755 if (b & 0x70)
|
|
756 {
|
|
757 vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
|
|
758 vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
|
|
759 /* NaN */
|
|
760 if (b & 0x40)
|
|
761 {
|
|
762 vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
|
|
763 a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
|
|
764 a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
|
|
765 result = vec_or(result, a_nan);
|
|
766 }
|
|
767 /* inf */
|
|
768 if (b & 0x30)
|
|
769 {
|
|
770 a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
|
|
771 a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi);
|
|
772 /* +inf */
|
|
773 if (b & 0x20)
|
|
774 result = vec_or(vec_andc(a_inf, sign), result);
|
|
775 /* -inf */
|
|
776 if (b & 0x10)
|
|
777 result = vec_or(vec_and(a_inf, sign), result);
|
|
778 }
|
|
779 }
|
|
780 /* 0 or denorm */
|
|
781 if (b & 0xF)
|
|
782 {
|
|
783 vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0));
|
|
784 iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
|
|
785 /* denorm */
|
|
786 if (b & 0x3)
|
|
787 {
|
|
788 vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF};
|
|
789 vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero);
|
|
790 isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi);
|
|
791 /* +denorm */
|
|
792 if (b & 0x2)
|
|
793 result = vec_or(vec_andc(isdenorm, sign), result);
|
|
794 /* -denorm */
|
|
795 if (b & 0x1)
|
|
796 result = vec_or(vec_and(isdenorm, sign), result);
|
|
797 }
|
|
798 /* 0 */
|
|
799 if (b & 0xC)
|
|
800 {
|
|
801 iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi);
|
|
802 /* +0 */
|
|
803 if (b & 0x8)
|
|
804 result = vec_or(vec_andc(iszero, sign), result);
|
|
805 /* -0 */
|
|
806 if (b & 0x4)
|
|
807 result = vec_or(vec_and(iszero, sign), result);
|
|
808 }
|
|
809 }
|
|
810 return ((qword)result);
|
|
811 }
|
|
812
|
|
813
|
|
814 /* Carry generate
|
|
815 */
|
|
816 #define si_cg(_a, _b) ((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b))))
|
|
817
|
|
818 #define si_cgx(_a, _b, _c) ((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), \
|
|
819 vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \
|
|
820 vec_and((vec_uint4)(_c), vec_splat_u32(1))))))
|
|
821
|
|
822
|
|
823 /* Count ones for bytes
|
|
824 */
|
|
825 static __inline qword si_cntb(qword a)
|
|
826 {
|
|
827 vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
|
828 vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
|
|
829 vec_uchar16 av;
|
|
830
|
|
831 av = (vec_uchar16)(a);
|
|
832
|
|
833 return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av),
|
|
834 vec_perm(nib_cnt, nib_cnt, vec_sr (av, four)))));
|
|
835 }
|
|
836
|
|
837 /* Count ones for bytes
|
|
838 */
|
|
839 static __inline qword si_clz(qword a)
|
|
840 {
|
|
841 vec_uchar16 av;
|
|
842 vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3;
|
|
843 vec_uchar16 four = vec_splat_u8(4);
|
|
844 vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
845 vec_uchar16 eight = vec_splat_u8(8);
|
|
846 vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
|
|
847 vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24};
|
|
848
|
|
849 av = (vec_uchar16)(a);
|
|
850
|
|
851 cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four));
|
|
852 cnt_lo = vec_perm(nib_cnt, nib_cnt, av);
|
|
853
|
|
854 cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four)));
|
|
855
|
|
856 tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight));
|
|
857 tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen));
|
|
858 tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour));
|
|
859
|
|
860 cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
|
|
861 cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
|
|
862 cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
|
|
863
|
|
864 return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
|
|
865 }
|
|
866
|
|
867 /* Convert to float
|
|
868 */
|
|
869 #define si_cuflt(_a, _b) ((qword)(vec_ctf((vec_uint4)(_a), _b)))
|
|
870 #define si_csflt(_a, _b) ((qword)(vec_ctf((vec_int4)(_a), _b)))
|
|
871
|
|
872 /* Convert to signed int
|
|
873 */
|
|
874 #define si_cflts(_a, _b) ((qword)(vec_cts((vec_float4)(_a), _b)))
|
|
875
|
|
876 /* Convert to unsigned int
|
|
877 */
|
|
878 #define si_cfltu(_a, _b) ((qword)(vec_ctu((vec_float4)(_a), _b)))
|
|
879
|
|
880 /* Synchronize
|
|
881 */
|
|
882 #define si_dsync() /* do nothing */
|
|
883 #define si_sync() /* do nothing */
|
|
884 #define si_syncc() /* do nothing */
|
|
885
|
|
886
|
|
887 /* Equivalence
|
|
888 */
|
|
889 static __inline qword si_eqv(qword a, qword b)
|
|
890 {
|
|
891 vec_uchar16 d;
|
|
892
|
|
893 d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b));
|
|
894 return ((qword)(vec_nor(d, d)));
|
|
895 }
|
|
896
|
|
897 /* Extend
|
|
898 */
|
|
899 static __inline qword si_xsbh(qword a)
|
|
900 {
|
|
901 vec_char16 av;
|
|
902
|
|
903 av = (vec_char16)(a);
|
|
904 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15,
|
|
905 0, 0, 0, 0, 0, 0, 0, 0})))));
|
|
906 }
|
|
907
|
|
908 static __inline qword si_xshw(qword a)
|
|
909 {
|
|
910 vec_short8 av;
|
|
911
|
|
912 av = (vec_short8)(a);
|
|
913 return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7,
|
|
914 10,11,14,15,
|
|
915 0, 0, 0, 0,
|
|
916 0, 0, 0, 0})))));
|
|
917 }
|
|
918
|
|
919 static __inline qword si_xswd(qword a)
|
|
920 {
|
|
921 vec_int4 av;
|
|
922
|
|
923 av = (vec_int4)(a);
|
|
924 return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})),
|
|
925 ((vec_uchar16){20, 21, 22, 23,
|
|
926 4, 5, 6, 7,
|
|
927 28, 29, 30, 31,
|
|
928 12, 13, 14, 15}))));
|
|
929 }
|
|
930
|
|
931 static __inline qword si_fesd(qword a)
|
|
932 {
|
|
933 union {
|
|
934 double d[2];
|
|
935 vec_double2 vd;
|
|
936 } out;
|
|
937 union {
|
|
938 float f[4];
|
|
939 vec_float4 vf;
|
|
940 } in;
|
|
941
|
|
942 in.vf = (vec_float4)(a);
|
|
943 out.d[0] = (double)(in.f[0]);
|
|
944 out.d[1] = (double)(in.f[2]);
|
|
945 return ((qword)(out.vd));
|
|
946 }
|
|
947
|
|
948 /* Gather
|
|
949 */
|
|
950 static __inline qword si_gbb(qword a)
|
|
951 {
|
|
952 vec_uchar16 bits;
|
|
953 vec_uint4 bytes;
|
|
954
|
|
955 bits = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0,
|
|
956 7, 6, 5, 4, 3, 2, 1, 0}));
|
|
957 bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0}));
|
|
958
|
|
959 return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0,
|
|
960 0, 0, 0, 0, 0, 0, 0, 0}))));
|
|
961 }
|
|
962
|
|
963
|
|
964 static __inline qword si_gbh(qword a)
|
|
965 {
|
|
966 vec_ushort8 bits;
|
|
967 vec_uint4 bytes;
|
|
968
|
|
969 bits = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0}));
|
|
970
|
|
971 bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0});
|
|
972
|
|
973 return ((qword)(vec_sld(bytes, bytes, 12)));
|
|
974 }
|
|
975
|
|
976 static __inline qword si_gb(qword a)
|
|
977 {
|
|
978 vec_uint4 bits;
|
|
979 vec_uint4 bytes;
|
|
980
|
|
981 bits = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0}));
|
|
982 bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0}));
|
|
983 return ((qword)(vec_sld(bytes, bytes, 12)));
|
|
984 }
|
|
985
|
|
986
|
|
987 /* Compare and halt
|
|
988 */
|
|
989 static __inline void si_heq(qword a, qword b)
|
|
990 {
|
|
991 union {
|
|
992 vector unsigned int v;
|
|
993 unsigned int i[4];
|
|
994 } aa, bb;
|
|
995
|
|
996 aa.v = (vector unsigned int)(a);
|
|
997 bb.v = (vector unsigned int)(b);
|
|
998
|
|
999 if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; };
|
|
1000 }
|
|
1001
|
|
1002 static __inline void si_heqi(qword a, unsigned int b)
|
|
1003 {
|
|
1004 union {
|
|
1005 vector unsigned int v;
|
|
1006 unsigned int i[4];
|
|
1007 } aa;
|
|
1008
|
|
1009 aa.v = (vector unsigned int)(a);
|
|
1010
|
|
1011 if (aa.i[0] == b) { SPU_HALT_ACTION; };
|
|
1012 }
|
|
1013
|
|
1014 static __inline void si_hgt(qword a, qword b)
|
|
1015 {
|
|
1016 union {
|
|
1017 vector signed int v;
|
|
1018 signed int i[4];
|
|
1019 } aa, bb;
|
|
1020
|
|
1021 aa.v = (vector signed int)(a);
|
|
1022 bb.v = (vector signed int)(b);
|
|
1023
|
|
1024 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
|
|
1025 }
|
|
1026
|
|
1027 static __inline void si_hgti(qword a, signed int b)
|
|
1028 {
|
|
1029 union {
|
|
1030 vector signed int v;
|
|
1031 signed int i[4];
|
|
1032 } aa;
|
|
1033
|
|
1034 aa.v = (vector signed int)(a);
|
|
1035
|
|
1036 if (aa.i[0] > b) { SPU_HALT_ACTION; };
|
|
1037 }
|
|
1038
|
|
1039 static __inline void si_hlgt(qword a, qword b)
|
|
1040 {
|
|
1041 union {
|
|
1042 vector unsigned int v;
|
|
1043 unsigned int i[4];
|
|
1044 } aa, bb;
|
|
1045
|
|
1046 aa.v = (vector unsigned int)(a);
|
|
1047 bb.v = (vector unsigned int)(b);
|
|
1048
|
|
1049 if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
|
|
1050 }
|
|
1051
|
|
1052 static __inline void si_hlgti(qword a, unsigned int b)
|
|
1053 {
|
|
1054 union {
|
|
1055 vector unsigned int v;
|
|
1056 unsigned int i[4];
|
|
1057 } aa;
|
|
1058
|
|
1059 aa.v = (vector unsigned int)(a);
|
|
1060
|
|
1061 if (aa.i[0] > b) { SPU_HALT_ACTION; };
|
|
1062 }
|
|
1063
|
|
1064
|
|
1065 /* Multiply and Add
|
|
1066 */
|
|
1067 static __inline qword si_mpya(qword a, qword b, qword c)
|
|
1068 {
|
|
1069 return ((qword)(vec_msum(vec_and((vec_short8)(a),
|
|
1070 ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})),
|
|
1071 (vec_short8)(b), (vec_int4)(c))));
|
|
1072 }
|
|
1073
|
|
1074 static __inline qword si_fma(qword a, qword b, qword c)
|
|
1075 {
|
|
1076 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
|
|
1077 }
|
|
1078
|
|
1079 static __inline qword si_dfma(qword a, qword b, qword c)
|
|
1080 {
|
|
1081 union {
|
|
1082 vec_double2 v;
|
|
1083 double d[2];
|
|
1084 } aa, bb, cc, dd;
|
|
1085
|
|
1086 aa.v = (vec_double2)(a);
|
|
1087 bb.v = (vec_double2)(b);
|
|
1088 cc.v = (vec_double2)(c);
|
|
1089 dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0];
|
|
1090 dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1];
|
|
1091 return ((qword)(dd.v));
|
|
1092 }
|
|
1093
|
|
1094 /* Form Mask
|
|
1095 */
|
|
1096 #define si_fsmbi(_a) si_fsmb(si_from_int(_a))
|
|
1097
|
|
1098 static __inline qword si_fsmb(qword a)
|
|
1099 {
|
|
1100 vec_char16 mask;
|
|
1101 vec_ushort8 in;
|
|
1102
|
|
1103 in = (vec_ushort8)(a);
|
|
1104 mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2,
|
|
1105 3, 3, 3, 3, 3, 3, 3, 3})));
|
|
1106 return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7,
|
|
1107 0, 1, 2, 3, 4, 5, 6, 7})),
|
|
1108 vec_splat_u8(7))));
|
|
1109 }
|
|
1110
|
|
1111
|
|
1112 static __inline qword si_fsmh(qword a)
|
|
1113 {
|
|
1114 vec_uchar16 in;
|
|
1115 vec_short8 mask;
|
|
1116
|
|
1117 in = (vec_uchar16)(a);
|
|
1118 mask = (vec_short8)(vec_splat(in, 3));
|
|
1119 return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})),
|
|
1120 vec_splat_u16(15))));
|
|
1121 }
|
|
1122
|
|
1123 static __inline qword si_fsm(qword a)
|
|
1124 {
|
|
1125 vec_uchar16 in;
|
|
1126 vec_int4 mask;
|
|
1127
|
|
1128 in = (vec_uchar16)(a);
|
|
1129 mask = (vec_int4)(vec_splat(in, 3));
|
|
1130 return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})),
|
|
1131 ((vec_uint4){31,31,31,31}))));
|
|
1132 }
|
|
1133
|
|
1134 /* Move from/to registers
|
|
1135 */
|
|
1136 #define si_fscrrd() ((qword)((vec_uint4){0}))
|
|
1137 #define si_fscrwr(_a)
|
|
1138
|
|
1139 #define si_mfspr(_reg) ((qword)((vec_uint4){0}))
|
|
1140 #define si_mtspr(_reg, _a)
|
|
1141
|
|
1142 /* Multiply High High Add
|
|
1143 */
|
|
1144 static __inline qword si_mpyhha(qword a, qword b, qword c)
|
|
1145 {
|
|
1146 return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c))));
|
|
1147 }
|
|
1148
|
|
1149 static __inline qword si_mpyhhau(qword a, qword b, qword c)
|
|
1150 {
|
|
1151 return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c))));
|
|
1152 }
|
|
1153
|
|
1154 /* Multiply Subtract
|
|
1155 */
|
|
1156 static __inline qword si_fms(qword a, qword b, qword c)
|
|
1157 {
|
|
1158 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b),
|
|
1159 vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
|
|
1160 }
|
|
1161
|
|
1162 static __inline qword si_dfms(qword a, qword b, qword c)
|
|
1163 {
|
|
1164 union {
|
|
1165 vec_double2 v;
|
|
1166 double d[2];
|
|
1167 } aa, bb, cc, dd;
|
|
1168
|
|
1169 aa.v = (vec_double2)(a);
|
|
1170 bb.v = (vec_double2)(b);
|
|
1171 cc.v = (vec_double2)(c);
|
|
1172 dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0];
|
|
1173 dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1];
|
|
1174 return ((qword)(dd.v));
|
|
1175 }
|
|
1176
|
|
1177 /* Multiply
|
|
1178 */
|
|
1179 static __inline qword si_fm(qword a, qword b)
|
|
1180 {
|
|
1181 return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f}))));
|
|
1182 }
|
|
1183
|
|
1184 static __inline qword si_dfm(qword a, qword b)
|
|
1185 {
|
|
1186 union {
|
|
1187 vec_double2 v;
|
|
1188 double d[2];
|
|
1189 } aa, bb, dd;
|
|
1190
|
|
1191 aa.v = (vec_double2)(a);
|
|
1192 bb.v = (vec_double2)(b);
|
|
1193 dd.d[0] = aa.d[0] * bb.d[0];
|
|
1194 dd.d[1] = aa.d[1] * bb.d[1];
|
|
1195 return ((qword)(dd.v));
|
|
1196 }
|
|
1197
|
|
1198 /* Multiply High
|
|
1199 */
|
|
1200 static __inline qword si_mpyh(qword a, qword b)
|
|
1201 {
|
|
1202 vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16};
|
|
1203
|
|
1204 return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen)));
|
|
1205 }
|
|
1206
|
|
1207
|
|
1208 /* Multiply High High
|
|
1209 */
|
|
1210 static __inline qword si_mpyhh(qword a, qword b)
|
|
1211 {
|
|
1212 return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b))));
|
|
1213 }
|
|
1214
|
|
1215 static __inline qword si_mpyhhu(qword a, qword b)
|
|
1216 {
|
|
1217 return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b))));
|
|
1218 }
|
|
1219
|
|
1220 /* Multiply Odd
|
|
1221 */
|
|
1222 static __inline qword si_mpy(qword a, qword b)
|
|
1223 {
|
|
1224 return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b))));
|
|
1225 }
|
|
1226
|
|
1227 static __inline qword si_mpyu(qword a, qword b)
|
|
1228 {
|
|
1229 return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b))));
|
|
1230 }
|
|
1231
|
|
1232 static __inline qword si_mpyi(qword a, short b)
|
|
1233 {
|
|
1234 return ((qword)(vec_mulo((vec_short8)(a),
|
|
1235 vec_splat((vec_short8)(si_from_short(b)), 1))));
|
|
1236 }
|
|
1237
|
|
1238 static __inline qword si_mpyui(qword a, unsigned short b)
|
|
1239 {
|
|
1240 return ((qword)(vec_mulo((vec_ushort8)(a),
|
|
1241 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
|
|
1242 }
|
|
1243
|
|
1244 /* Multiply and Shift Right
|
|
1245 */
|
|
1246 static __inline qword si_mpys(qword a, qword b)
|
|
1247 {
|
|
1248 return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16}))));
|
|
1249 }
|
|
1250
|
|
1251 /* Nand
|
|
1252 */
|
|
1253 static __inline qword si_nand(qword a, qword b)
|
|
1254 {
|
|
1255 vec_uchar16 d;
|
|
1256
|
|
1257 d = vec_and((vec_uchar16)(a), (vec_uchar16)(b));
|
|
1258 return ((qword)(vec_nor(d, d)));
|
|
1259 }
|
|
1260
|
|
1261 /* Negative Multiply Add
|
|
1262 */
|
|
1263 static __inline qword si_dfnma(qword a, qword b, qword c)
|
|
1264 {
|
|
1265 union {
|
|
1266 vec_double2 v;
|
|
1267 double d[2];
|
|
1268 } aa, bb, cc, dd;
|
|
1269
|
|
1270 aa.v = (vec_double2)(a);
|
|
1271 bb.v = (vec_double2)(b);
|
|
1272 cc.v = (vec_double2)(c);
|
|
1273 dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0];
|
|
1274 dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1];
|
|
1275 return ((qword)(dd.v));
|
|
1276 }
|
|
1277
|
|
1278 /* Negative Multiply and Subtract
|
|
1279 */
|
|
1280 static __inline qword si_fnms(qword a, qword b, qword c)
|
|
1281 {
|
|
1282 return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
|
|
1283 }
|
|
1284
|
|
1285 static __inline qword si_dfnms(qword a, qword b, qword c)
|
|
1286 {
|
|
1287 union {
|
|
1288 vec_double2 v;
|
|
1289 double d[2];
|
|
1290 } aa, bb, cc, dd;
|
|
1291
|
|
1292 aa.v = (vec_double2)(a);
|
|
1293 bb.v = (vec_double2)(b);
|
|
1294 cc.v = (vec_double2)(c);
|
|
1295 dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0];
|
|
1296 dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1];
|
|
1297 return ((qword)(dd.v));
|
|
1298 }
|
|
1299
|
|
1300 /* Nor
|
|
1301 */
|
|
1302 static __inline qword si_nor(qword a, qword b)
|
|
1303 {
|
|
1304 return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b))));
|
|
1305 }
|
|
1306
|
|
1307 /* Or
|
|
1308 */
|
|
1309 static __inline qword si_or(qword a, qword b)
|
|
1310 {
|
|
1311 return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b))));
|
|
1312 }
|
|
1313
|
|
1314 static __inline qword si_orbi(qword a, unsigned char b)
|
|
1315 {
|
|
1316 return ((qword)(vec_or((vec_uchar16)(a),
|
|
1317 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
|
|
1318 }
|
|
1319
|
|
1320 static __inline qword si_orhi(qword a, unsigned short b)
|
|
1321 {
|
|
1322 return ((qword)(vec_or((vec_ushort8)(a),
|
|
1323 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
|
|
1324 }
|
|
1325
|
|
1326 static __inline qword si_ori(qword a, unsigned int b)
|
|
1327 {
|
|
1328 return ((qword)(vec_or((vec_uint4)(a),
|
|
1329 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
|
|
1330 }
|
|
1331
|
|
1332 /* Or Complement
|
|
1333 */
|
|
1334 static __inline qword si_orc(qword a, qword b)
|
|
1335 {
|
|
1336 return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b)))));
|
|
1337 }
|
|
1338
|
|
1339
|
|
1340 /* Or Across
|
|
1341 */
|
|
1342 static __inline qword si_orx(qword a)
|
|
1343 {
|
|
1344 vec_uchar16 tmp;
|
|
1345 tmp = (vec_uchar16)(a);
|
|
1346 tmp = vec_or(tmp, vec_sld(tmp, tmp, 8));
|
|
1347 tmp = vec_or(tmp, vec_sld(tmp, tmp, 4));
|
|
1348 return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00,
|
|
1349 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}))));
|
|
1350 }
|
|
1351
|
|
1352
|
|
1353 /* Estimates
|
|
1354 */
|
|
1355 static __inline qword si_frest(qword a)
|
|
1356 {
|
|
1357 return ((qword)(vec_re((vec_float4)(a))));
|
|
1358 }
|
|
1359
|
|
1360 static __inline qword si_frsqest(qword a)
|
|
1361 {
|
|
1362 return ((qword)(vec_rsqrte((vec_float4)(a))));
|
|
1363 }
|
|
1364
|
|
1365 #define si_fi(_a, _d) (_d)
|
|
1366
|
|
1367 /* Channel Read and Write
|
|
1368 */
|
|
1369 #define si_rdch(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */
|
|
1370 #define si_rchcnt(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */
|
|
1371 #define si_wrch(_channel, _a) /* not mappable */
|
|
1372
|
|
1373 /* Rotate Left
|
|
1374 */
|
|
1375 static __inline qword si_roth(qword a, qword b)
|
|
1376 {
|
|
1377 return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b))));
|
|
1378 }
|
|
1379
|
|
1380 static __inline qword si_rot(qword a, qword b)
|
|
1381 {
|
|
1382 return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b))));
|
|
1383 }
|
|
1384
|
|
1385 static __inline qword si_rothi(qword a, int b)
|
|
1386 {
|
|
1387 return ((qword)(vec_rl((vec_ushort8)(a),
|
|
1388 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
|
|
1389 }
|
|
1390
|
|
1391 static __inline qword si_roti(qword a, int b)
|
|
1392 {
|
|
1393 return ((qword)(vec_rl((vec_uint4)(a),
|
|
1394 vec_splat((vec_uint4)(si_from_int(b)), 0))));
|
|
1395 }
|
|
1396
|
|
1397 /* Rotate Left with Mask
|
|
1398 */
|
|
1399 static __inline qword si_rothm(qword a, qword b)
|
|
1400 {
|
|
1401 vec_ushort8 neg_b;
|
|
1402 vec_ushort8 mask;
|
|
1403
|
|
1404 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
|
|
1405 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
|
|
1406 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
|
|
1407 }
|
|
1408
|
|
1409 static __inline qword si_rotm(qword a, qword b)
|
|
1410 {
|
|
1411 vec_uint4 neg_b;
|
|
1412 vec_uint4 mask;
|
|
1413
|
|
1414 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
|
|
1415 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1416 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
|
|
1417 }
|
|
1418
|
|
1419 static __inline qword si_rothmi(qword a, int b)
|
|
1420 {
|
|
1421 vec_ushort8 neg_b;
|
|
1422 vec_ushort8 mask;
|
|
1423
|
|
1424 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
|
|
1425 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
|
|
1426 return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
|
|
1427 }
|
|
1428
|
|
1429 static __inline qword si_rotmi(qword a, int b)
|
|
1430 {
|
|
1431 vec_uint4 neg_b;
|
|
1432 vec_uint4 mask;
|
|
1433
|
|
1434 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
|
|
1435 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1436 return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
|
|
1437 }
|
|
1438
|
|
1439
|
|
1440 /* Rotate Left Algebraic with Mask
|
|
1441 */
|
|
1442 static __inline qword si_rotmah(qword a, qword b)
|
|
1443 {
|
|
1444 vec_ushort8 neg_b;
|
|
1445 vec_ushort8 mask;
|
|
1446
|
|
1447 neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
|
|
1448 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
|
|
1449 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
|
|
1450 }
|
|
1451
|
|
1452 static __inline qword si_rotma(qword a, qword b)
|
|
1453 {
|
|
1454 vec_uint4 neg_b;
|
|
1455 vec_uint4 mask;
|
|
1456
|
|
1457 neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
|
|
1458 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1459 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
|
|
1460 }
|
|
1461
|
|
1462
|
|
1463 static __inline qword si_rotmahi(qword a, int b)
|
|
1464 {
|
|
1465 vec_ushort8 neg_b;
|
|
1466 vec_ushort8 mask;
|
|
1467
|
|
1468 neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
|
|
1469 mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
|
|
1470 return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
|
|
1471 }
|
|
1472
|
|
1473 static __inline qword si_rotmai(qword a, int b)
|
|
1474 {
|
|
1475 vec_uint4 neg_b;
|
|
1476 vec_uint4 mask;
|
|
1477
|
|
1478 neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
|
|
1479 mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1480 return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
|
|
1481 }
|
|
1482
|
|
1483
|
|
1484 /* Rotate Left Quadword by Bytes with Mask
|
|
1485 */
|
|
1486 static __inline qword si_rotqmbyi(qword a, int count)
|
|
1487 {
|
|
1488 union {
|
|
1489 vec_uchar16 v;
|
|
1490 int i[4];
|
|
1491 } x;
|
|
1492 vec_uchar16 mask;
|
|
1493
|
|
1494 count = 0 - count;
|
|
1495 x.i[3] = count << 3;
|
|
1496 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1497
|
|
1498 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
|
|
1499 }
|
|
1500
|
|
1501
|
|
1502 static __inline qword si_rotqmby(qword a, qword count)
|
|
1503 {
|
|
1504 union {
|
|
1505 vec_uchar16 v;
|
|
1506 int i[4];
|
|
1507 } x;
|
|
1508 int cnt;
|
|
1509 vec_uchar16 mask;
|
|
1510
|
|
1511 x.v = (vec_uchar16)(count);
|
|
1512 x.i[0] = cnt = (0 - x.i[0]) << 3;
|
|
1513
|
|
1514 x.v = vec_splat(x.v, 3);
|
|
1515 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1516
|
|
1517 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
|
|
1518 }
|
|
1519
|
|
1520
|
|
1521 /* Rotate Left Quadword by Bytes
|
|
1522 */
|
|
1523 static __inline qword si_rotqbyi(qword a, int count)
|
|
1524 {
|
|
1525 union {
|
|
1526 vec_uchar16 v;
|
|
1527 int i[4];
|
|
1528 } left, right;
|
|
1529
|
|
1530 count <<= 3;
|
|
1531 left.i[3] = count;
|
|
1532 right.i[3] = 0 - count;
|
|
1533 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v))));
|
|
1534 }
|
|
1535
|
|
1536 static __inline qword si_rotqby(qword a, qword count)
|
|
1537 {
|
|
1538 vec_uchar16 left, right;
|
|
1539
|
|
1540 left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
|
|
1541 right = vec_sub(vec_splat_u8(0), left);
|
|
1542 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
|
|
1543 }
|
|
1544
|
|
1545 /* Rotate Left Quadword by Bytes Bit Count
|
|
1546 */
|
|
1547 static __inline qword si_rotqbybi(qword a, qword count)
|
|
1548 {
|
|
1549 vec_uchar16 left, right;
|
|
1550
|
|
1551 left = vec_splat((vec_uchar16)(count), 3);
|
|
1552 right = vec_sub(vec_splat_u8(7), left);
|
|
1553 return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
|
|
1554 }
|
|
1555
|
|
1556
|
|
1557 /* Rotate Left Quadword by Bytes Bit Count
|
|
1558 */
|
|
1559 static __inline qword si_rotqbii(qword a, int count)
|
|
1560 {
|
|
1561 vec_uchar16 x, y;
|
|
1562 vec_uchar16 result;
|
|
1563
|
|
1564 x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
|
|
1565 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
|
|
1566 (vec_uint4)vec_sub(vec_splat_u8(8), x)));
|
|
1567 result = vec_or(vec_sll((qword)(a), x), y);
|
|
1568 return ((qword)(result));
|
|
1569 }
|
|
1570
|
|
1571 static __inline qword si_rotqbi(qword a, qword count)
|
|
1572 {
|
|
1573 vec_uchar16 x, y;
|
|
1574 vec_uchar16 result;
|
|
1575
|
|
1576 x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
|
|
1577 y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
|
|
1578 (vec_uint4)vec_sub(vec_splat_u8(8), x)));
|
|
1579
|
|
1580 result = vec_or(vec_sll((qword)(a), x), y);
|
|
1581 return ((qword)(result));
|
|
1582 }
|
|
1583
|
|
1584
|
|
1585 /* Rotate Left Quadword and Mask by Bits
|
|
1586 */
|
|
1587 static __inline qword si_rotqmbii(qword a, int count)
|
|
1588 {
|
|
1589 return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3))));
|
|
1590 }
|
|
1591
|
|
1592 static __inline qword si_rotqmbi(qword a, qword count)
|
|
1593 {
|
|
1594 return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3)))));
|
|
1595 }
|
|
1596
|
|
1597
|
|
1598 /* Rotate Left Quadword and Mask by Bytes with Bit Count
|
|
1599 */
|
|
1600 static __inline qword si_rotqmbybi(qword a, qword count)
|
|
1601 {
|
|
1602 union {
|
|
1603 vec_uchar16 v;
|
|
1604 int i[4];
|
|
1605 } x;
|
|
1606 int cnt;
|
|
1607 vec_uchar16 mask;
|
|
1608
|
|
1609 x.v = (vec_uchar16)(count);
|
|
1610 x.i[0] = cnt = 0 - (x.i[0] & ~7);
|
|
1611 x.v = vec_splat(x.v, 3);
|
|
1612 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1613
|
|
1614 return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
|
|
1615 }
|
|
1616
|
|
1617
|
|
1618
|
|
1619
|
|
1620 /* Round Double to Float
|
|
1621 */
|
|
1622 static __inline qword si_frds(qword a)
|
|
1623 {
|
|
1624 union {
|
|
1625 vec_float4 v;
|
|
1626 float f[4];
|
|
1627 } d;
|
|
1628 union {
|
|
1629 vec_double2 v;
|
|
1630 double d[2];
|
|
1631 } in;
|
|
1632
|
|
1633 in.v = (vec_double2)(a);
|
|
1634 d.v = (vec_float4){0.0f};
|
|
1635 d.f[0] = (float)in.d[0];
|
|
1636 d.f[2] = (float)in.d[1];
|
|
1637
|
|
1638 return ((qword)(d.v));
|
|
1639 }
|
|
1640
|
|
1641 /* Select Bits
|
|
1642 */
|
|
1643 static __inline qword si_selb(qword a, qword b, qword c)
|
|
1644 {
|
|
1645 return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c))));
|
|
1646 }
|
|
1647
|
|
1648
|
|
1649 /* Shuffle Bytes
|
|
1650 */
|
|
1651 static __inline qword si_shufb(qword a, qword b, qword pattern)
|
|
1652 {
|
|
1653 vec_uchar16 pat;
|
|
1654
|
|
1655 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}),
|
|
1656 vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
|
|
1657 vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
|
|
1658 return ((qword)(vec_perm(vec_perm(a, b, pattern),
|
|
1659 ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
|
|
1660 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
|
|
1661 pat)));
|
|
1662 }
|
|
1663
|
|
1664
|
|
1665 /* Shift Left
|
|
1666 */
|
|
1667 static __inline qword si_shlh(qword a, qword b)
|
|
1668 {
|
|
1669 vec_ushort8 mask;
|
|
1670
|
|
1671 mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15));
|
|
1672 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask)));
|
|
1673 }
|
|
1674
|
|
1675 static __inline qword si_shl(qword a, qword b)
|
|
1676 {
|
|
1677 vec_uint4 mask;
|
|
1678
|
|
1679 mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1680 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask)));
|
|
1681 }
|
|
1682
|
|
1683
|
|
1684 static __inline qword si_shlhi(qword a, unsigned int b)
|
|
1685 {
|
|
1686 vec_ushort8 mask;
|
|
1687 vec_ushort8 bv;
|
|
1688
|
|
1689 bv = vec_splat((vec_ushort8)(si_from_int(b)), 1);
|
|
1690 mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15));
|
|
1691 return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask)));
|
|
1692 }
|
|
1693
|
|
1694 static __inline qword si_shli(qword a, unsigned int b)
|
|
1695 {
|
|
1696 vec_uint4 bv;
|
|
1697 vec_uint4 mask;
|
|
1698
|
|
1699 bv = vec_splat((vec_uint4)(si_from_uint(b)), 0);
|
|
1700 mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
|
|
1701 return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask)));
|
|
1702 }
|
|
1703
|
|
1704
|
|
1705 /* Shift Left Quadword
|
|
1706 */
|
|
1707 static __inline qword si_shlqbii(qword a, unsigned int count)
|
|
1708 {
|
|
1709 vec_uchar16 x;
|
|
1710
|
|
1711 x = vec_splat((vec_uchar16)(si_from_uint(count)), 3);
|
|
1712 return ((qword)(vec_sll((vec_uchar16)(a), x)));
|
|
1713 }
|
|
1714
|
|
1715 static __inline qword si_shlqbi(qword a, qword count)
|
|
1716 {
|
|
1717 vec_uchar16 x;
|
|
1718
|
|
1719 x = vec_splat((vec_uchar16)(count), 3);
|
|
1720 return ((qword)(vec_sll((vec_uchar16)(a), x)));
|
|
1721 }
|
|
1722
|
|
1723
|
|
1724 /* Shift Left Quadword by Bytes
|
|
1725 */
|
|
1726 static __inline qword si_shlqbyi(qword a, unsigned int count)
|
|
1727 {
|
|
1728 union {
|
|
1729 vec_uchar16 v;
|
|
1730 int i[4];
|
|
1731 } x;
|
|
1732 vec_uchar16 mask;
|
|
1733
|
|
1734 x.i[3] = count << 3;
|
|
1735 mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1736 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
|
|
1737 }
|
|
1738
|
|
1739 static __inline qword si_shlqby(qword a, qword count)
|
|
1740 {
|
|
1741 union {
|
|
1742 vec_uchar16 v;
|
|
1743 unsigned int i[4];
|
|
1744 } x;
|
|
1745 unsigned int cnt;
|
|
1746 vec_uchar16 mask;
|
|
1747
|
|
1748 x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
|
|
1749 cnt = x.i[0];
|
|
1750 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1751 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
|
|
1752 }
|
|
1753
|
|
1754 /* Shift Left Quadword by Bytes with Bit Count
|
|
1755 */
|
|
1756 static __inline qword si_shlqbybi(qword a, qword count)
|
|
1757 {
|
|
1758 union {
|
|
1759 vec_uchar16 v;
|
|
1760 int i[4];
|
|
1761 } x;
|
|
1762 unsigned int cnt;
|
|
1763 vec_uchar16 mask;
|
|
1764
|
|
1765 x.v = vec_splat((vec_uchar16)(count), 3);
|
|
1766 cnt = x.i[0];
|
|
1767 mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
|
|
1768 return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
|
|
1769 }
|
|
1770
|
|
1771
|
|
1772 /* Stop and Signal
|
|
1773 */
|
|
1774 #define si_stop(_type) SPU_STOP_ACTION
|
|
1775 #define si_stopd(a, b, c) SPU_STOP_ACTION
|
|
1776
|
|
1777
|
|
1778 /* Subtract
|
|
1779 */
|
|
1780 static __inline qword si_sfh(qword a, qword b)
|
|
1781 {
|
|
1782 return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a))));
|
|
1783 }
|
|
1784
|
|
1785 static __inline qword si_sf(qword a, qword b)
|
|
1786 {
|
|
1787 return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a))));
|
|
1788 }
|
|
1789
|
|
1790 static __inline qword si_fs(qword a, qword b)
|
|
1791 {
|
|
1792 return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b))));
|
|
1793 }
|
|
1794
|
|
1795 static __inline qword si_dfs(qword a, qword b)
|
|
1796 {
|
|
1797 union {
|
|
1798 vec_double2 v;
|
|
1799 double d[2];
|
|
1800 } aa, bb, dd;
|
|
1801
|
|
1802 aa.v = (vec_double2)(a);
|
|
1803 bb.v = (vec_double2)(b);
|
|
1804 dd.d[0] = aa.d[0] - bb.d[0];
|
|
1805 dd.d[1] = aa.d[1] - bb.d[1];
|
|
1806 return ((qword)(dd.v));
|
|
1807 }
|
|
1808
|
|
1809 static __inline qword si_sfhi(qword a, short b)
|
|
1810 {
|
|
1811 return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1),
|
|
1812 (vec_short8)(a))));
|
|
1813 }
|
|
1814
|
|
1815 static __inline qword si_sfi(qword a, int b)
|
|
1816 {
|
|
1817 return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0),
|
|
1818 (vec_int4)(a))));
|
|
1819 }
|
|
1820
|
|
1821 /* Subtract word extended
|
|
1822 */
|
|
1823 #define si_sfx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_b), \
|
|
1824 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), \
|
|
1825 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
|
|
1826
|
|
1827
|
|
1828 /* Sum Bytes into Shorts
|
|
1829 */
|
|
1830 static __inline qword si_sumb(qword a, qword b)
|
|
1831 {
|
|
1832 vec_uint4 zero = (vec_uint4){0};
|
|
1833 vec_ushort8 sum_a, sum_b;
|
|
1834
|
|
1835 sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
|
|
1836 sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
|
|
1837
|
|
1838 return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19, 2, 3, 22, 23, 6, 7,
|
|
1839 26, 27, 10, 11, 30, 31, 14, 15}))));
|
|
1840 }
|
|
1841
|
|
1842 /* Exclusive OR
|
|
1843 */
|
|
1844 static __inline qword si_xor(qword a, qword b)
|
|
1845 {
|
|
1846 return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b))));
|
|
1847 }
|
|
1848
|
|
1849 static __inline qword si_xorbi(qword a, unsigned char b)
|
|
1850 {
|
|
1851 return ((qword)(vec_xor((vec_uchar16)(a),
|
|
1852 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
|
|
1853 }
|
|
1854
|
|
1855 static __inline qword si_xorhi(qword a, unsigned short b)
|
|
1856 {
|
|
1857 return ((qword)(vec_xor((vec_ushort8)(a),
|
|
1858 vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
|
|
1859 }
|
|
1860
|
|
1861 static __inline qword si_xori(qword a, unsigned int b)
|
|
1862 {
|
|
1863 return ((qword)(vec_xor((vec_uint4)(a),
|
|
1864 vec_splat((vec_uint4)(si_from_uint(b)), 0))));
|
|
1865 }
|
|
1866
|
|
1867
|
|
1868 /* Generate Controls for Sub-Quadword Insertion
|
|
1869 */
|
|
1870 static __inline qword si_cbd(qword a, int imm)
|
|
1871 {
|
|
1872 union {
|
|
1873 vec_uint4 v;
|
|
1874 unsigned char c[16];
|
|
1875 } shmask;
|
|
1876
|
|
1877 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1878 shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03;
|
|
1879 return ((qword)(shmask.v));
|
|
1880 }
|
|
1881
|
|
1882 static __inline qword si_cdd(qword a, int imm)
|
|
1883 {
|
|
1884 union {
|
|
1885 vec_uint4 v;
|
|
1886 unsigned long long ll[2];
|
|
1887 } shmask;
|
|
1888
|
|
1889 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1890 shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL;
|
|
1891 return ((qword)(shmask.v));
|
|
1892 }
|
|
1893
|
|
1894 static __inline qword si_chd(qword a, int imm)
|
|
1895 {
|
|
1896 union {
|
|
1897 vec_uint4 v;
|
|
1898 unsigned short s[8];
|
|
1899 } shmask;
|
|
1900
|
|
1901 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1902 shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203;
|
|
1903 return ((qword)(shmask.v));
|
|
1904 }
|
|
1905
|
|
1906 static __inline qword si_cwd(qword a, int imm)
|
|
1907 {
|
|
1908 union {
|
|
1909 vec_uint4 v;
|
|
1910 unsigned int i[4];
|
|
1911 } shmask;
|
|
1912
|
|
1913 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1914 shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203;
|
|
1915 return ((qword)(shmask.v));
|
|
1916 }
|
|
1917
|
|
1918 static __inline qword si_cbx(qword a, qword b)
|
|
1919 {
|
|
1920 union {
|
|
1921 vec_uint4 v;
|
|
1922 unsigned char c[16];
|
|
1923 } shmask;
|
|
1924
|
|
1925 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1926 shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03;
|
|
1927 return ((qword)(shmask.v));
|
|
1928 }
|
|
1929
|
|
1930
|
|
1931 static __inline qword si_cdx(qword a, qword b)
|
|
1932 {
|
|
1933 union {
|
|
1934 vec_uint4 v;
|
|
1935 unsigned long long ll[2];
|
|
1936 } shmask;
|
|
1937
|
|
1938 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1939 shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL;
|
|
1940 return ((qword)(shmask.v));
|
|
1941 }
|
|
1942
|
|
1943 static __inline qword si_chx(qword a, qword b)
|
|
1944 {
|
|
1945 union {
|
|
1946 vec_uint4 v;
|
|
1947 unsigned short s[8];
|
|
1948 } shmask;
|
|
1949
|
|
1950 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1951 shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203;
|
|
1952 return ((qword)(shmask.v));
|
|
1953 }
|
|
1954
|
|
1955 static __inline qword si_cwx(qword a, qword b)
|
|
1956 {
|
|
1957 union {
|
|
1958 vec_uint4 v;
|
|
1959 unsigned int i[4];
|
|
1960 } shmask;
|
|
1961
|
|
1962 shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
|
|
1963 shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203;
|
|
1964 return ((qword)(shmask.v));
|
|
1965 }
|
|
1966
|
|
1967
|
|
1968 /* Constant Formation
|
|
1969 */
|
|
1970 static __inline qword si_il(signed short imm)
|
|
1971 {
|
|
1972 return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0)));
|
|
1973 }
|
|
1974
|
|
1975
|
|
1976 static __inline qword si_ila(unsigned int imm)
|
|
1977 {
|
|
1978 return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0)));
|
|
1979 }
|
|
1980
|
|
1981 static __inline qword si_ilh(signed short imm)
|
|
1982 {
|
|
1983 return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1)));
|
|
1984 }
|
|
1985
|
|
1986 static __inline qword si_ilhu(signed short imm)
|
|
1987 {
|
|
1988 return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0)));
|
|
1989 }
|
|
1990
|
|
1991 static __inline qword si_iohl(qword a, unsigned short imm)
|
|
1992 {
|
|
1993 return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0))));
|
|
1994 }
|
|
1995
|
|
1996 /* No Operation
|
|
1997 */
|
|
1998 #define si_lnop() /* do nothing */
|
|
1999 #define si_nop() /* do nothing */
|
|
2000
|
|
2001
|
|
2002 /* Memory Load and Store
|
|
2003 */
|
|
2004 static __inline qword si_lqa(unsigned int imm)
|
|
2005 {
|
|
2006 return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
|
|
2007 }
|
|
2008
|
|
2009 static __inline qword si_lqd(qword a, unsigned int imm)
|
|
2010 {
|
|
2011 return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm))));
|
|
2012 }
|
|
2013
|
|
2014 static __inline qword si_lqr(unsigned int imm)
|
|
2015 {
|
|
2016 return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
|
|
2017 }
|
|
2018
|
|
2019 static __inline qword si_lqx(qword a, qword b)
|
|
2020 {
|
|
2021 return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0))));
|
|
2022 }
|
|
2023
|
|
2024 static __inline void si_stqa(qword a, unsigned int imm)
|
|
2025 {
|
|
2026 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
|
|
2027 }
|
|
2028
|
|
2029 static __inline void si_stqd(qword a, qword b, unsigned int imm)
|
|
2030 {
|
|
2031 vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm));
|
|
2032 }
|
|
2033
|
|
2034 static __inline void si_stqr(qword a, unsigned int imm)
|
|
2035 {
|
|
2036 vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
|
|
2037 }
|
|
2038
|
|
2039 static __inline void si_stqx(qword a, qword b, qword c)
|
|
2040 {
|
|
2041 vec_st((vec_uchar16)(a),
|
|
2042 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
|
|
2043 (vector unsigned char *)(0));
|
|
2044 }
|
|
2045
|
|
2046 #endif /* !__SPU__ */
|
|
2047 #endif /* !_SI2VMX_H_ */
|
|
2048
|