Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/spu/spu.md @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 3bfb6c00c1e0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 ;; Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. | |
2 | |
3 ;; This file is free software; you can redistribute it and/or modify it under | |
4 ;; the terms of the GNU General Public License as published by the Free | |
5 ;; Software Foundation; either version 3 of the License, or (at your option) | |
6 ;; any later version. | |
7 | |
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT | |
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
11 ;; for more details. | |
12 | |
13 ;; You should have received a copy of the GNU General Public License | |
14 ;; along with GCC; see the file COPYING3. If not see | |
15 ;; <http://www.gnu.org/licenses/>. | |
16 | |
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. | |
18 | |
19 | |
20 ;; Define an insn type attribute. This is used in function unit delay | |
21 ;; computations. | |
22 ;; multi0 is a multiple insn rtl whose first insn is in pipe0 | |
23 ;; multi1 is a multiple insn rtl whose first insn is in pipe1 | |
24 (define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert" | |
25 (const_string "fx2")) | |
26 | |
27 ;; Length (in bytes). | |
28 (define_attr "length" "" | |
29 (const_int 4)) | |
30 | |
31 (define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune"))) | |
32 ;; Processor type -- this attribute must exactly match the processor_type | |
33 ;; enumeration in spu.h. | |
34 | |
35 (define_attr "cpu" "spu" | |
36 (const (symbol_ref "spu_cpu_attr"))) | |
37 | |
38 ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY | |
39 ; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST]) | |
40 | |
41 (define_cpu_unit "pipe0,pipe1,fp,ls") | |
42 | |
43 (define_insn_reservation "NOP" 1 (eq_attr "type" "nop") | |
44 "pipe0") | |
45 | |
46 (define_insn_reservation "FX2" 2 (eq_attr "type" "fx2") | |
47 "pipe0, nothing") | |
48 | |
49 (define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb") | |
50 "pipe0, nothing*3") | |
51 | |
52 (define_insn_reservation "FP6" 6 (eq_attr "type" "fp6") | |
53 "pipe0 + fp, nothing*5") | |
54 | |
55 (define_insn_reservation "FP7" 7 (eq_attr "type" "fp7") | |
56 "pipe0, fp, nothing*5") | |
57 | |
58 ;; The behavior of the double precision is that both pipes stall | |
59 ;; for 6 cycles and the rest of the operation pipelines for | |
60 ;; 7 cycles. The simplest way to model this is to simply ignore | |
61 ;; the 6 cyle stall. | |
62 (define_insn_reservation "FPD" 7 | |
63 (and (eq_attr "tune" "cell") | |
64 (eq_attr "type" "fpd")) | |
65 "pipe0 + pipe1, fp, nothing*5") | |
66 | |
67 ;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined | |
68 (define_insn_reservation "FPD_CELLEDP" 9 | |
69 (and (eq_attr "tune" "celledp") | |
70 (eq_attr "type" "fpd")) | |
71 "pipe0 + fp, nothing*8") | |
72 | |
73 (define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop") | |
74 "pipe1") | |
75 | |
76 (define_insn_reservation "STORE" 1 (eq_attr "type" "store") | |
77 "pipe1 + ls") | |
78 | |
79 (define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch") | |
80 "pipe1 + ls") | |
81 | |
82 (define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr") | |
83 "pipe1, nothing*3") | |
84 | |
85 (define_insn_reservation "LOAD" 6 (eq_attr "type" "load") | |
86 "pipe1 + ls, nothing*5") | |
87 | |
88 (define_insn_reservation "HBR" 18 (eq_attr "type" "hbr") | |
89 "pipe1, nothing*15") | |
90 | |
91 (define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0") | |
92 "pipe0+pipe1, nothing*3") | |
93 | |
94 (define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1") | |
95 "pipe1, nothing*3") | |
96 | |
97 (define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert") | |
98 "nothing") | |
99 | |
100 ;; Force pipe0 to occur before pipe 1 in a cycle. | |
101 (absence_set "pipe0" "pipe1") | |
102 | |
103 | |
104 (define_constants [ | |
105 (UNSPEC_BLOCKAGE 0) | |
106 (UNSPEC_IPREFETCH 1) | |
107 (UNSPEC_FREST 2) | |
108 (UNSPEC_FRSQEST 3) | |
109 (UNSPEC_FI 4) | |
110 (UNSPEC_EXTEND_CMP 5) | |
111 (UNSPEC_CG 6) | |
112 (UNSPEC_CGX 7) | |
113 (UNSPEC_ADDX 8) | |
114 (UNSPEC_BG 9) | |
115 (UNSPEC_BGX 10) | |
116 (UNSPEC_SFX 11) | |
117 (UNSPEC_FSM 12) | |
118 (UNSPEC_HBR 13) | |
119 (UNSPEC_LNOP 14) | |
120 (UNSPEC_NOP 15) | |
121 (UNSPEC_CONVERT 16) | |
122 (UNSPEC_SELB 17) | |
123 (UNSPEC_SHUFB 18) | |
124 (UNSPEC_CPAT 19) | |
125 (UNSPEC_SYNC 20) | |
126 (UNSPEC_CNTB 21) | |
127 (UNSPEC_SUMB 22) | |
128 (UNSPEC_FSMB 23) | |
129 (UNSPEC_FSMH 24) | |
130 (UNSPEC_GBB 25) | |
131 (UNSPEC_GBH 26) | |
132 (UNSPEC_GB 27) | |
133 (UNSPEC_AVGB 28) | |
134 (UNSPEC_ABSDB 29) | |
135 (UNSPEC_ORX 30) | |
136 (UNSPEC_HEQ 31) | |
137 (UNSPEC_HGT 32) | |
138 (UNSPEC_HLGT 33) | |
139 (UNSPEC_CSFLT 34) | |
140 (UNSPEC_CFLTS 35) | |
141 (UNSPEC_CUFLT 36) | |
142 (UNSPEC_CFLTU 37) | |
143 (UNSPEC_STOP 38) | |
144 (UNSPEC_STOPD 39) | |
145 (UNSPEC_SET_INTR 40) | |
146 (UNSPEC_FSCRRD 42) | |
147 (UNSPEC_FSCRWR 43) | |
148 (UNSPEC_MFSPR 44) | |
149 (UNSPEC_MTSPR 45) | |
150 (UNSPEC_RDCH 46) | |
151 (UNSPEC_RCHCNT 47) | |
152 (UNSPEC_WRCH 48) | |
153 (UNSPEC_SPU_REALIGN_LOAD 49) | |
154 (UNSPEC_SPU_MASK_FOR_LOAD 50) | |
155 (UNSPEC_DFTSV 51) | |
156 (UNSPEC_FLOAT_EXTEND 52) | |
157 (UNSPEC_FLOAT_TRUNCATE 53) | |
158 (UNSPEC_SP_SET 54) | |
159 (UNSPEC_SP_TEST 55) | |
160 ]) | |
161 | |
162 (include "predicates.md") | |
163 (include "constraints.md") | |
164 | |
165 | |
166 ;; Mode iterators | |
167 | |
168 (define_mode_iterator ALL [QI V16QI | |
169 HI V8HI | |
170 SI V4SI | |
171 DI V2DI | |
172 TI | |
173 SF V4SF | |
174 DF V2DF]) | |
175 | |
176 ; Everything except DI and TI which are handled separately because | |
177 ; they need different constraints to correctly test VOIDmode constants | |
178 (define_mode_iterator MOV [QI V16QI | |
179 HI V8HI | |
180 SI V4SI | |
181 V2DI | |
182 SF V4SF | |
183 DF V2DF]) | |
184 | |
185 (define_mode_iterator DTI [DI TI]) | |
186 | |
187 (define_mode_iterator VINT [QI V16QI | |
188 HI V8HI | |
189 SI V4SI | |
190 DI V2DI | |
191 TI]) | |
192 | |
193 (define_mode_iterator VQHSI [QI V16QI | |
194 HI V8HI | |
195 SI V4SI]) | |
196 | |
197 (define_mode_iterator VHSI [HI V8HI | |
198 SI V4SI]) | |
199 | |
200 (define_mode_iterator VSDF [SF V4SF | |
201 DF V2DF]) | |
202 | |
203 (define_mode_iterator VSI [SI V4SI]) | |
204 (define_mode_iterator VDI [DI V2DI]) | |
205 (define_mode_iterator VSF [SF V4SF]) | |
206 (define_mode_iterator VDF [DF V2DF]) | |
207 | |
208 (define_mode_iterator VCMP [V16QI | |
209 V8HI | |
210 V4SI | |
211 V4SF | |
212 V2DF]) | |
213 | |
214 (define_mode_iterator VCMPU [V16QI | |
215 V8HI | |
216 V4SI]) | |
217 | |
218 (define_mode_attr v [(V8HI "v") (V4SI "v") | |
219 (HI "") (SI "")]) | |
220 | |
221 (define_mode_attr bh [(QI "b") (V16QI "b") | |
222 (HI "h") (V8HI "h") | |
223 (SI "") (V4SI "")]) | |
224 | |
225 (define_mode_attr d [(SF "") (V4SF "") | |
226 (DF "d") (V2DF "d")]) | |
227 (define_mode_attr d6 [(SF "6") (V4SF "6") | |
228 (DF "d") (V2DF "d")]) | |
229 | |
230 (define_mode_attr f2i [(SF "si") (V4SF "v4si") | |
231 (DF "di") (V2DF "v2di")]) | |
232 (define_mode_attr F2I [(SF "SI") (V4SF "V4SI") | |
233 (DF "DI") (V2DF "V2DI")]) | |
234 | |
235 (define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")]) | |
236 | |
237 (define_mode_attr umask [(HI "f") (V8HI "f") | |
238 (SI "g") (V4SI "g")]) | |
239 (define_mode_attr nmask [(HI "F") (V8HI "F") | |
240 (SI "G") (V4SI "G")]) | |
241 | |
242 ;; Used for carry and borrow instructions. | |
243 (define_mode_iterator CBOP [SI DI V4SI V2DI]) | |
244 | |
245 ;; Used in vec_set and vec_extract | |
246 (define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF]) | |
247 (define_mode_attr inner [(V16QI "QI") | |
248 (V8HI "HI") | |
249 (V4SI "SI") | |
250 (V2DI "DI") | |
251 (V4SF "SF") | |
252 (V2DF "DF")]) | |
253 (define_mode_attr vmult [(V16QI "1") | |
254 (V8HI "2") | |
255 (V4SI "4") | |
256 (V2DI "8") | |
257 (V4SF "4") | |
258 (V2DF "8")]) | |
259 (define_mode_attr voff [(V16QI "13") | |
260 (V8HI "14") | |
261 (V4SI "0") | |
262 (V2DI "0") | |
263 (V4SF "0") | |
264 (V2DF "0")]) | |
265 | |
266 | |
267 ;; mov | |
268 | |
269 (define_expand "mov<mode>" | |
270 [(set (match_operand:ALL 0 "spu_nonimm_operand" "=r,r,r,m") | |
271 (match_operand:ALL 1 "general_operand" "r,i,m,r"))] | |
272 "" | |
273 { | |
274 if (spu_expand_mov(operands, <MODE>mode)) | |
275 DONE; | |
276 }) | |
277 | |
278 (define_split | |
279 [(set (match_operand 0 "spu_reg_operand") | |
280 (match_operand 1 "immediate_operand"))] | |
281 | |
282 "" | |
283 [(set (match_dup 0) | |
284 (high (match_dup 1))) | |
285 (set (match_dup 0) | |
286 (lo_sum (match_dup 0) | |
287 (match_dup 1)))] | |
288 { | |
289 if (spu_split_immediate (operands)) | |
290 DONE; | |
291 FAIL; | |
292 }) | |
293 | |
294 (define_insn "pic" | |
295 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
296 (match_operand:SI 1 "immediate_operand" "s")) | |
297 (use (const_int 0))] | |
298 "flag_pic" | |
299 "ila\t%0,%%pic(%1)") | |
300 | |
301 ;; Whenever a function generates the 'pic' pattern above we need to | |
302 ;; load the pic_offset_table register. | |
303 ;; GCC doesn't deal well with labels in the middle of a block so we | |
304 ;; hardcode the offsets in the asm here. | |
305 (define_insn "load_pic_offset" | |
306 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
307 (unspec:SI [(const_int 0)] 0)) | |
308 (set (match_operand:SI 1 "spu_reg_operand" "=r") | |
309 (unspec:SI [(const_int 0)] 0))] | |
310 "flag_pic" | |
311 "ila\t%1,.+8\;brsl\t%0,4" | |
312 [(set_attr "length" "8") | |
313 (set_attr "type" "multi0")]) | |
314 | |
315 | |
316 ;; move internal | |
317 | |
318 (define_insn "_mov<mode>" | |
319 [(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m") | |
320 (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))] | |
321 "spu_valid_move (operands)" | |
322 "@ | |
323 ori\t%0,%1,0 | |
324 il%s1\t%0,%S1 | |
325 fsmbi\t%0,%S1 | |
326 c%s1d\t%0,%S1($sp) | |
327 lq%p1\t%0,%1 | |
328 stq%p0\t%1,%0" | |
329 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) | |
330 | |
331 (define_insn "low_<mode>" | |
332 [(set (match_operand:VSI 0 "spu_reg_operand" "=r") | |
333 (lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0") | |
334 (match_operand:VSI 2 "immediate_operand" "i")))] | |
335 "" | |
336 "iohl\t%0,%2@l") | |
337 | |
338 (define_insn "_movdi" | |
339 [(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m") | |
340 (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))] | |
341 "spu_valid_move (operands)" | |
342 "@ | |
343 ori\t%0,%1,0 | |
344 il%d1\t%0,%D1 | |
345 fsmbi\t%0,%D1 | |
346 c%d1d\t%0,%D1($sp) | |
347 lq%p1\t%0,%1 | |
348 stq%p0\t%1,%0" | |
349 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) | |
350 | |
351 (define_insn "_movti" | |
352 [(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m") | |
353 (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))] | |
354 "spu_valid_move (operands)" | |
355 "@ | |
356 ori\t%0,%1,0 | |
357 il%t1\t%0,%T1 | |
358 fsmbi\t%0,%T1 | |
359 c%t1d\t%0,%T1($sp) | |
360 lq%p1\t%0,%1 | |
361 stq%p0\t%1,%0" | |
362 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) | |
363 | |
364 (define_insn_and_split "load" | |
365 [(set (match_operand 0 "spu_reg_operand" "=r") | |
366 (match_operand 1 "memory_operand" "m")) | |
367 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r")) | |
368 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))] | |
369 "GET_MODE(operands[0]) == GET_MODE(operands[1])" | |
370 "#" | |
371 "" | |
372 [(set (match_dup 0) | |
373 (match_dup 1))] | |
374 { spu_split_load(operands); DONE; }) | |
375 | |
376 (define_insn_and_split "store" | |
377 [(set (match_operand 0 "memory_operand" "=m") | |
378 (match_operand 1 "spu_reg_operand" "r")) | |
379 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r")) | |
380 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))] | |
381 "GET_MODE(operands[0]) == GET_MODE(operands[1])" | |
382 "#" | |
383 "" | |
384 [(set (match_dup 0) | |
385 (match_dup 1))] | |
386 { spu_split_store(operands); DONE; }) | |
387 | |
388 ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d | |
389 | |
390 (define_expand "cpat" | |
391 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
392 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r") | |
393 (match_operand:SI 2 "spu_nonmem_operand" "r,n") | |
394 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))] | |
395 "" | |
396 { | |
397 rtx x = gen_cpat_const (operands); | |
398 if (x) | |
399 { | |
400 emit_move_insn (operands[0], x); | |
401 DONE; | |
402 } | |
403 }) | |
404 | |
405 (define_insn "_cpat" | |
406 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
407 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r") | |
408 (match_operand:SI 2 "spu_nonmem_operand" "r,n") | |
409 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))] | |
410 "" | |
411 "@ | |
412 c%M3x\t%0,%1,%2 | |
413 c%M3d\t%0,%C2(%1)" | |
414 [(set_attr "type" "shuf")]) | |
415 | |
416 (define_split | |
417 [(set (match_operand:TI 0 "spu_reg_operand") | |
418 (unspec:TI [(match_operand:SI 1 "spu_nonmem_operand") | |
419 (match_operand:SI 2 "immediate_operand") | |
420 (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))] | |
421 "" | |
422 [(set (match_dup:TI 0) | |
423 (match_dup:TI 4))] | |
424 { | |
425 operands[4] = gen_cpat_const (operands); | |
426 if (!operands[4]) | |
427 FAIL; | |
428 }) | |
429 | |
430 ;; extend | |
431 | |
432 (define_insn "extendqihi2" | |
433 [(set (match_operand:HI 0 "spu_reg_operand" "=r") | |
434 (sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))] | |
435 "" | |
436 "xsbh\t%0,%1") | |
437 | |
438 (define_insn "extendhisi2" | |
439 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
440 (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))] | |
441 "" | |
442 "xshw\t%0,%1") | |
443 | |
444 (define_expand "extendsidi2" | |
445 [(set (match_dup:DI 2) | |
446 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" ""))) | |
447 (set (match_operand:DI 0 "spu_reg_operand" "") | |
448 (sign_extend:DI (vec_select:SI (match_dup:V2SI 3) | |
449 (parallel [(const_int 1)]))))] | |
450 "" | |
451 { | |
452 operands[2] = gen_reg_rtx (DImode); | |
453 operands[3] = spu_gen_subreg (V2SImode, operands[2]); | |
454 }) | |
455 | |
456 (define_insn "xswd" | |
457 [(set (match_operand:DI 0 "spu_reg_operand" "=r") | |
458 (sign_extend:DI | |
459 (vec_select:SI | |
460 (match_operand:V2SI 1 "spu_reg_operand" "r") | |
461 (parallel [(const_int 1) ]))))] | |
462 "" | |
463 "xswd\t%0,%1"); | |
464 | |
465 (define_expand "extendqiti2" | |
466 [(set (match_operand:TI 0 "register_operand" "") | |
467 (sign_extend:TI (match_operand:QI 1 "register_operand" "")))] | |
468 "" | |
469 "spu_expand_sign_extend(operands); | |
470 DONE;") | |
471 | |
472 (define_expand "extendhiti2" | |
473 [(set (match_operand:TI 0 "register_operand" "") | |
474 (sign_extend:TI (match_operand:HI 1 "register_operand" "")))] | |
475 "" | |
476 "spu_expand_sign_extend(operands); | |
477 DONE;") | |
478 | |
479 (define_expand "extendsiti2" | |
480 [(set (match_operand:TI 0 "register_operand" "") | |
481 (sign_extend:TI (match_operand:SI 1 "register_operand" "")))] | |
482 "" | |
483 "spu_expand_sign_extend(operands); | |
484 DONE;") | |
485 | |
486 (define_expand "extendditi2" | |
487 [(set (match_operand:TI 0 "register_operand" "") | |
488 (sign_extend:TI (match_operand:DI 1 "register_operand" "")))] | |
489 "" | |
490 "spu_expand_sign_extend(operands); | |
491 DONE;") | |
492 | |
493 | |
494 ;; zero_extend | |
495 | |
496 (define_insn "zero_extendqihi2" | |
497 [(set (match_operand:HI 0 "spu_reg_operand" "=r") | |
498 (zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))] | |
499 "" | |
500 "andi\t%0,%1,0x00ff") | |
501 | |
502 (define_insn "zero_extendqisi2" | |
503 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
504 (zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))] | |
505 "" | |
506 "andi\t%0,%1,0x00ff") | |
507 | |
508 (define_expand "zero_extendhisi2" | |
509 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
510 (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))) | |
511 (clobber (match_scratch:SI 2 "=&r"))] | |
512 "" | |
513 { | |
514 rtx mask = gen_reg_rtx (SImode); | |
515 rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0); | |
516 emit_move_insn (mask, GEN_INT (0xffff)); | |
517 emit_insn (gen_andsi3(operands[0], op1, mask)); | |
518 DONE; | |
519 }) | |
520 | |
521 (define_insn "zero_extendsidi2" | |
522 [(set (match_operand:DI 0 "spu_reg_operand" "=r") | |
523 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))] | |
524 "" | |
525 "rotqmbyi\t%0,%1,-4" | |
526 [(set_attr "type" "shuf")]) | |
527 | |
528 (define_insn "zero_extendsiti2" | |
529 [(set (match_operand:TI 0 "spu_reg_operand" "=r") | |
530 (zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))] | |
531 "" | |
532 "rotqmbyi\t%0,%1,-12" | |
533 [(set_attr "type" "shuf")]) | |
534 | |
535 (define_insn "zero_extendditi2" | |
536 [(set (match_operand:TI 0 "spu_reg_operand" "=r") | |
537 (zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))] | |
538 "" | |
539 "rotqmbyi\t%0,%1,-8" | |
540 [(set_attr "type" "shuf")]) | |
541 | |
542 | |
543 ;; trunc | |
544 | |
545 (define_insn "truncdiqi2" | |
546 [(set (match_operand:QI 0 "spu_reg_operand" "=r") | |
547 (truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))] | |
548 "" | |
549 "shlqbyi\t%0,%1,4" | |
550 [(set_attr "type" "shuf")]) | |
551 | |
552 (define_insn "truncdihi2" | |
553 [(set (match_operand:HI 0 "spu_reg_operand" "=r") | |
554 (truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))] | |
555 "" | |
556 "shlqbyi\t%0,%1,4" | |
557 [(set_attr "type" "shuf")]) | |
558 | |
559 (define_insn "truncdisi2" | |
560 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
561 (truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))] | |
562 "" | |
563 "shlqbyi\t%0,%1,4" | |
564 [(set_attr "type" "shuf")]) | |
565 | |
566 (define_insn "trunctiqi2" | |
567 [(set (match_operand:QI 0 "spu_reg_operand" "=r") | |
568 (truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))] | |
569 "" | |
570 "shlqbyi\t%0,%1,12" | |
571 [(set_attr "type" "shuf")]) | |
572 | |
573 (define_insn "trunctihi2" | |
574 [(set (match_operand:HI 0 "spu_reg_operand" "=r") | |
575 (truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))] | |
576 "" | |
577 "shlqbyi\t%0,%1,12" | |
578 [(set_attr "type" "shuf")]) | |
579 | |
580 (define_insn "trunctisi2" | |
581 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
582 (truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))] | |
583 "" | |
584 "shlqbyi\t%0,%1,12" | |
585 [(set_attr "type" "shuf")]) | |
586 | |
587 (define_insn "trunctidi2" | |
588 [(set (match_operand:DI 0 "spu_reg_operand" "=r") | |
589 (truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))] | |
590 "" | |
591 "shlqbyi\t%0,%1,8" | |
592 [(set_attr "type" "shuf")]) | |
593 | |
594 | |
595 ;; float conversions | |
596 | |
597 (define_insn "floatsisf2" | |
598 [(set (match_operand:SF 0 "spu_reg_operand" "=r") | |
599 (float:SF (match_operand:SI 1 "spu_reg_operand" "r")))] | |
600 "" | |
601 "csflt\t%0,%1,0" | |
602 [(set_attr "type" "fp7")]) | |
603 | |
604 (define_insn "floatv4siv4sf2" | |
605 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
606 (float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))] | |
607 "" | |
608 "csflt\t%0,%1,0" | |
609 [(set_attr "type" "fp7")]) | |
610 | |
611 (define_insn "fix_truncsfsi2" | |
612 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
613 (fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))] | |
614 "" | |
615 "cflts\t%0,%1,0" | |
616 [(set_attr "type" "fp7")]) | |
617 | |
618 (define_insn "fix_truncv4sfv4si2" | |
619 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
620 (fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))] | |
621 "" | |
622 "cflts\t%0,%1,0" | |
623 [(set_attr "type" "fp7")]) | |
624 | |
625 (define_insn "floatunssisf2" | |
626 [(set (match_operand:SF 0 "spu_reg_operand" "=r") | |
627 (unsigned_float:SF (match_operand:SI 1 "spu_reg_operand" "r")))] | |
628 "" | |
629 "cuflt\t%0,%1,0" | |
630 [(set_attr "type" "fp7")]) | |
631 | |
632 (define_insn "floatunsv4siv4sf2" | |
633 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
634 (unsigned_float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))] | |
635 "" | |
636 "cuflt\t%0,%1,0" | |
637 [(set_attr "type" "fp7")]) | |
638 | |
639 (define_insn "fixuns_truncsfsi2" | |
640 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
641 (unsigned_fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))] | |
642 "" | |
643 "cfltu\t%0,%1,0" | |
644 [(set_attr "type" "fp7")]) | |
645 | |
646 (define_insn "fixuns_truncv4sfv4si2" | |
647 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
648 (unsigned_fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))] | |
649 "" | |
650 "cfltu\t%0,%1,0" | |
651 [(set_attr "type" "fp7")]) | |
652 | |
653 (define_insn "extendsfdf2" | |
654 [(set (match_operand:DF 0 "spu_reg_operand" "=r") | |
655 (unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")] | |
656 UNSPEC_FLOAT_EXTEND))] | |
657 "" | |
658 "fesd\t%0,%1" | |
659 [(set_attr "type" "fpd")]) | |
660 | |
661 (define_insn "truncdfsf2" | |
662 [(set (match_operand:SF 0 "spu_reg_operand" "=r") | |
663 (unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")] | |
664 UNSPEC_FLOAT_TRUNCATE))] | |
665 "" | |
666 "frds\t%0,%1" | |
667 [(set_attr "type" "fpd")]) | |
668 | |
669 (define_expand "floatdisf2" | |
670 [(set (match_operand:SF 0 "register_operand" "") | |
671 (float:SF (match_operand:DI 1 "register_operand" "")))] | |
672 "" | |
673 { | |
674 rtx c0 = gen_reg_rtx (SImode); | |
675 rtx r0 = gen_reg_rtx (DImode); | |
676 rtx r1 = gen_reg_rtx (SFmode); | |
677 rtx r2 = gen_reg_rtx (SImode); | |
678 rtx setneg = gen_reg_rtx (SImode); | |
679 rtx isneg = gen_reg_rtx (SImode); | |
680 rtx neg = gen_reg_rtx (DImode); | |
681 rtx mask = gen_reg_rtx (DImode); | |
682 | |
683 emit_move_insn (c0, GEN_INT (-0x80000000ll)); | |
684 | |
685 emit_insn (gen_negdi2 (neg, operands[1])); | |
686 emit_insn (gen_cgt_di_m1 (isneg, operands[1])); | |
687 emit_insn (gen_extend_compare (mask, isneg)); | |
688 emit_insn (gen_selb (r0, neg, operands[1], mask)); | |
689 emit_insn (gen_andc_si (setneg, c0, isneg)); | |
690 | |
691 emit_insn (gen_floatunsdisf2 (r1, r0)); | |
692 | |
693 emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg)); | |
694 emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0)); | |
695 DONE; | |
696 }) | |
697 | |
698 (define_insn_and_split "floatunsdisf2" | |
699 [(set (match_operand:SF 0 "register_operand" "=r") | |
700 (unsigned_float:SF (match_operand:DI 1 "register_operand" "r"))) | |
701 (clobber (match_scratch:SF 2 "=r")) | |
702 (clobber (match_scratch:SF 3 "=r")) | |
703 (clobber (match_scratch:SF 4 "=r"))] | |
704 "" | |
705 "#" | |
706 "reload_completed" | |
707 [(set (match_dup:SF 0) | |
708 (unsigned_float:SF (match_dup:DI 1)))] | |
709 { | |
710 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1])); | |
711 rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2])); | |
712 rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2])); | |
713 rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3])); | |
714 | |
715 REAL_VALUE_TYPE scale; | |
716 real_2expN (&scale, 32, SFmode); | |
717 | |
718 emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si)); | |
719 emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4))); | |
720 | |
721 emit_move_insn (operands[4], | |
722 CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode)); | |
723 emit_insn (gen_fma_sf (operands[0], | |
724 operands[2], operands[4], operands[3])); | |
725 DONE; | |
726 }) | |
727 | |
728 ;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000 | |
729 (define_expand "floatsidf2" | |
730 [(set (match_operand:DF 0 "register_operand" "") | |
731 (float:DF (match_operand:SI 1 "register_operand" "")))] | |
732 "" | |
733 { | |
734 rtx c0 = gen_reg_rtx (SImode); | |
735 rtx c1 = gen_reg_rtx (DFmode); | |
736 rtx r0 = gen_reg_rtx (SImode); | |
737 rtx r1 = gen_reg_rtx (DFmode); | |
738 | |
739 emit_move_insn (c0, GEN_INT (-0x80000000ll)); | |
740 emit_move_insn (c1, spu_float_const ("2147483648", DFmode)); | |
741 emit_insn (gen_xorsi3 (r0, operands[1], c0)); | |
742 emit_insn (gen_floatunssidf2 (r1, r0)); | |
743 emit_insn (gen_subdf3 (operands[0], r1, c1)); | |
744 DONE; | |
745 }) | |
746 | |
747 (define_expand "floatunssidf2" | |
748 [(set (match_operand:DF 0 "register_operand" "=r") | |
749 (unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))] | |
750 "" | |
751 "{ | |
752 rtx value, insns; | |
753 rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, | |
754 0x06071415, 0x16178080); | |
755 rtx r0 = gen_reg_rtx (V16QImode); | |
756 | |
757 if (optimize_size) | |
758 { | |
759 start_sequence (); | |
760 value = | |
761 emit_library_call_value (convert_optab_libfunc (ufloat_optab, | |
762 DFmode, SImode), | |
763 NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], SImode); | |
764 insns = get_insns (); | |
765 end_sequence (); | |
766 emit_libcall_block (insns, operands[0], value, | |
767 gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1])); | |
768 } | |
769 else | |
770 { | |
771 emit_move_insn (r0, c0); | |
772 emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0)); | |
773 } | |
774 DONE; | |
775 }") | |
776 | |
777 (define_insn_and_split "floatunssidf2_internal" | |
778 [(set (match_operand:DF 0 "register_operand" "=r") | |
779 (unsigned_float:DF (match_operand:SI 1 "register_operand" "r"))) | |
780 (use (match_operand:V16QI 2 "register_operand" "r")) | |
781 (clobber (match_scratch:V4SI 3 "=&r")) | |
782 (clobber (match_scratch:V4SI 4 "=&r")) | |
783 (clobber (match_scratch:V4SI 5 "=&r")) | |
784 (clobber (match_scratch:V4SI 6 "=&r"))] | |
785 "" | |
786 "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4" | |
787 "reload_completed" | |
788 [(set (match_dup:DF 0) | |
789 (unsigned_float:DF (match_dup:SI 1)))] | |
790 "{ | |
791 rtx *ops = operands; | |
792 rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1])); | |
793 rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0])); | |
794 rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2])); | |
795 rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6])); | |
796 emit_insn (gen_clzv4si2 (ops[3],op1_v4si)); | |
797 emit_move_insn (ops[6], spu_const (V4SImode, 1023+31)); | |
798 emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3])); | |
799 emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32))); | |
800 emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3])); | |
801 emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4])); | |
802 emit_insn (gen_andc_v4si (ops[6],ops[6],ops[5])); | |
803 emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti)); | |
804 emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4))); | |
805 DONE; | |
806 }" | |
807 [(set_attr "length" "32")]) | |
808 | |
809 (define_expand "floatdidf2" | |
810 [(set (match_operand:DF 0 "register_operand" "") | |
811 (float:DF (match_operand:DI 1 "register_operand" "")))] | |
812 "" | |
813 { | |
814 rtx c0 = gen_reg_rtx (DImode); | |
815 rtx r0 = gen_reg_rtx (DImode); | |
816 rtx r1 = gen_reg_rtx (DFmode); | |
817 rtx r2 = gen_reg_rtx (DImode); | |
818 rtx setneg = gen_reg_rtx (DImode); | |
819 rtx isneg = gen_reg_rtx (SImode); | |
820 rtx neg = gen_reg_rtx (DImode); | |
821 rtx mask = gen_reg_rtx (DImode); | |
822 | |
823 emit_move_insn (c0, GEN_INT (0x8000000000000000ull)); | |
824 | |
825 emit_insn (gen_negdi2 (neg, operands[1])); | |
826 emit_insn (gen_cgt_di_m1 (isneg, operands[1])); | |
827 emit_insn (gen_extend_compare (mask, isneg)); | |
828 emit_insn (gen_selb (r0, neg, operands[1], mask)); | |
829 emit_insn (gen_andc_di (setneg, c0, mask)); | |
830 | |
831 emit_insn (gen_floatunsdidf2 (r1, r0)); | |
832 | |
833 emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg)); | |
834 emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0)); | |
835 DONE; | |
836 }) | |
837 | |
838 (define_expand "floatunsdidf2" | |
839 [(set (match_operand:DF 0 "register_operand" "=r") | |
840 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))] | |
841 "" | |
842 "{ | |
843 rtx value, insns; | |
844 rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, | |
845 0x06071415, 0x16178080); | |
846 rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0); | |
847 rtx r0 = gen_reg_rtx (V16QImode); | |
848 rtx r1 = gen_reg_rtx (V4SImode); | |
849 | |
850 if (optimize_size) | |
851 { | |
852 start_sequence (); | |
853 value = | |
854 emit_library_call_value (convert_optab_libfunc (ufloat_optab, | |
855 DFmode, DImode), | |
856 NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], DImode); | |
857 insns = get_insns (); | |
858 end_sequence (); | |
859 emit_libcall_block (insns, operands[0], value, | |
860 gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1])); | |
861 } | |
862 else | |
863 { | |
864 emit_move_insn (r1, c1); | |
865 emit_move_insn (r0, c0); | |
866 emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1)); | |
867 } | |
868 DONE; | |
869 }") | |
870 | |
871 (define_insn_and_split "floatunsdidf2_internal" | |
872 [(set (match_operand:DF 0 "register_operand" "=r") | |
873 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r"))) | |
874 (use (match_operand:V16QI 2 "register_operand" "r")) | |
875 (use (match_operand:V4SI 3 "register_operand" "r")) | |
876 (clobber (match_scratch:V4SI 4 "=&r")) | |
877 (clobber (match_scratch:V4SI 5 "=&r")) | |
878 (clobber (match_scratch:V4SI 6 "=&r"))] | |
879 "" | |
880 "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5" | |
881 "reload_completed" | |
882 [(set (match_operand:DF 0 "register_operand" "=r") | |
883 (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))] | |
884 "{ | |
885 rtx *ops = operands; | |
886 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1])); | |
887 rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2])); | |
888 rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4])); | |
889 rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5])); | |
890 rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4])); | |
891 rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5])); | |
892 emit_insn (gen_clzv4si2 (ops[4],op1_v4si)); | |
893 emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4])); | |
894 emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32))); | |
895 emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4])); | |
896 emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5])); | |
897 emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6])); | |
898 emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti)); | |
899 emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4))); | |
900 emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8))); | |
901 emit_insn (gen_adddf3 (ops[0],op4_df,op5_df)); | |
902 DONE; | |
903 }" | |
904 [(set_attr "length" "40")]) | |
905 | |
906 | |
907 ;; add | |
908 | |
909 (define_expand "addv16qi3" | |
910 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
911 (plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r") | |
912 (match_operand:V16QI 2 "spu_reg_operand" "r")))] | |
913 "" | |
914 "{ | |
915 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0); | |
916 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0); | |
917 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0); | |
918 rtx rhs_and = gen_reg_rtx (V8HImode); | |
919 rtx hi_char = gen_reg_rtx (V8HImode); | |
920 rtx lo_char = gen_reg_rtx (V8HImode); | |
921 rtx mask = gen_reg_rtx (V8HImode); | |
922 | |
923 emit_move_insn (mask, spu_const (V8HImode, 0x00ff)); | |
924 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00))); | |
925 emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and)); | |
926 emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short)); | |
927 emit_insn (gen_selb (res_short, hi_char, lo_char, mask)); | |
928 DONE; | |
929 }") | |
930 | |
931 (define_insn "add<mode>3" | |
932 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
933 (plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
934 (match_operand:VHSI 2 "spu_arith_operand" "r,B")))] | |
935 "" | |
936 "@ | |
937 a<bh>\t%0,%1,%2 | |
938 a<bh>i\t%0,%1,%2") | |
939 | |
940 (define_expand "add<mode>3" | |
941 [(set (match_dup:VDI 3) | |
942 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "") | |
943 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG)) | |
944 (set (match_dup:VDI 5) | |
945 (unspec:VDI [(match_dup 3) | |
946 (match_dup 3) | |
947 (match_dup:TI 4)] UNSPEC_SHUFB)) | |
948 (set (match_operand:VDI 0 "spu_reg_operand" "") | |
949 (unspec:VDI [(match_dup 1) | |
950 (match_dup 2) | |
951 (match_dup 5)] UNSPEC_ADDX))] | |
952 "" | |
953 { | |
954 unsigned char pat[16] = { | |
955 0x04, 0x05, 0x06, 0x07, | |
956 0x80, 0x80, 0x80, 0x80, | |
957 0x0c, 0x0d, 0x0e, 0x0f, | |
958 0x80, 0x80, 0x80, 0x80 | |
959 }; | |
960 operands[3] = gen_reg_rtx (<MODE>mode); | |
961 operands[4] = gen_reg_rtx (TImode); | |
962 operands[5] = gen_reg_rtx (<MODE>mode); | |
963 emit_move_insn (operands[4], array_to_constant (TImode, pat)); | |
964 }) | |
965 | |
966 (define_insn "cg_<mode>" | |
967 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
968 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
969 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))] | |
970 "operands" | |
971 "cg\t%0,%1,%2") | |
972 | |
973 (define_insn "cgx_<mode>" | |
974 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
975 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
976 (match_operand 2 "spu_reg_operand" "r") | |
977 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))] | |
978 "operands" | |
979 "cgx\t%0,%1,%2") | |
980 | |
981 (define_insn "addx_<mode>" | |
982 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
983 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
984 (match_operand 2 "spu_reg_operand" "r") | |
985 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))] | |
986 "operands" | |
987 "addx\t%0,%1,%2") | |
988 | |
989 | |
990 ;; This is not the most efficient implementation of addti3. | |
991 ;; We include this here because 1) the compiler needs it to be | |
992 ;; defined as the word size is 128-bit and 2) sometimes gcc | |
993 ;; substitutes an add for a constant left-shift. 2) is unlikely | |
994 ;; because we also give addti3 a high cost. In case gcc does | |
995 ;; generate TImode add, here is the code to do it. | |
996 ;; operand 2 is a nonmemory because the compiler requires it. | |
997 (define_insn "addti3" | |
998 [(set (match_operand:TI 0 "spu_reg_operand" "=&r") | |
999 (plus:TI (match_operand:TI 1 "spu_reg_operand" "r") | |
1000 (match_operand:TI 2 "spu_nonmem_operand" "r"))) | |
1001 (clobber (match_scratch:TI 3 "=&r"))] | |
1002 "" | |
1003 "cg\t%3,%1,%2\n\\ | |
1004 shlqbyi\t%3,%3,4\n\\ | |
1005 cgx\t%3,%1,%2\n\\ | |
1006 shlqbyi\t%3,%3,4\n\\ | |
1007 cgx\t%3,%1,%2\n\\ | |
1008 shlqbyi\t%0,%3,4\n\\ | |
1009 addx\t%0,%1,%2" | |
1010 [(set_attr "type" "multi0") | |
1011 (set_attr "length" "28")]) | |
1012 | |
1013 (define_insn "add<mode>3" | |
1014 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1015 (plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1016 (match_operand:VSF 2 "spu_reg_operand" "r")))] | |
1017 "" | |
1018 "fa\t%0,%1,%2" | |
1019 [(set_attr "type" "fp6")]) | |
1020 | |
1021 (define_insn "add<mode>3" | |
1022 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1023 (plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1024 (match_operand:VDF 2 "spu_reg_operand" "r")))] | |
1025 "" | |
1026 "dfa\t%0,%1,%2" | |
1027 [(set_attr "type" "fpd")]) | |
1028 | |
1029 | |
1030 ;; sub | |
1031 | |
1032 (define_expand "subv16qi3" | |
1033 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
1034 (minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r") | |
1035 (match_operand:V16QI 2 "spu_reg_operand" "r")))] | |
1036 "" | |
1037 "{ | |
1038 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0); | |
1039 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0); | |
1040 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0); | |
1041 rtx rhs_and = gen_reg_rtx (V8HImode); | |
1042 rtx hi_char = gen_reg_rtx (V8HImode); | |
1043 rtx lo_char = gen_reg_rtx (V8HImode); | |
1044 rtx mask = gen_reg_rtx (V8HImode); | |
1045 | |
1046 emit_move_insn (mask, spu_const (V8HImode, 0x00ff)); | |
1047 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00))); | |
1048 emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and)); | |
1049 emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short)); | |
1050 emit_insn (gen_selb (res_short, hi_char, lo_char, mask)); | |
1051 DONE; | |
1052 }") | |
1053 | |
1054 (define_insn "sub<mode>3" | |
1055 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
1056 (minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B") | |
1057 (match_operand:VHSI 2 "spu_reg_operand" "r,r")))] | |
1058 "" | |
1059 "@ | |
1060 sf<bh>\t%0,%2,%1 | |
1061 sf<bh>i\t%0,%2,%1") | |
1062 | |
1063 (define_expand "sub<mode>3" | |
1064 [(set (match_dup:VDI 3) | |
1065 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "") | |
1066 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG)) | |
1067 (set (match_dup:VDI 5) | |
1068 (unspec:VDI [(match_dup 3) | |
1069 (match_dup 3) | |
1070 (match_dup:TI 4)] UNSPEC_SHUFB)) | |
1071 (set (match_operand:VDI 0 "spu_reg_operand" "") | |
1072 (unspec:VDI [(match_dup 1) | |
1073 (match_dup 2) | |
1074 (match_dup 5)] UNSPEC_SFX))] | |
1075 "" | |
1076 { | |
1077 unsigned char pat[16] = { | |
1078 0x04, 0x05, 0x06, 0x07, | |
1079 0xc0, 0xc0, 0xc0, 0xc0, | |
1080 0x0c, 0x0d, 0x0e, 0x0f, | |
1081 0xc0, 0xc0, 0xc0, 0xc0 | |
1082 }; | |
1083 operands[3] = gen_reg_rtx (<MODE>mode); | |
1084 operands[4] = gen_reg_rtx (TImode); | |
1085 operands[5] = gen_reg_rtx (<MODE>mode); | |
1086 emit_move_insn (operands[4], array_to_constant (TImode, pat)); | |
1087 }) | |
1088 | |
1089 (define_insn "bg_<mode>" | |
1090 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
1091 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
1092 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))] | |
1093 "operands" | |
1094 "bg\t%0,%2,%1") | |
1095 | |
1096 (define_insn "bgx_<mode>" | |
1097 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
1098 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
1099 (match_operand 2 "spu_reg_operand" "r") | |
1100 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))] | |
1101 "operands" | |
1102 "bgx\t%0,%2,%1") | |
1103 | |
1104 (define_insn "sfx_<mode>" | |
1105 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") | |
1106 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") | |
1107 (match_operand 2 "spu_reg_operand" "r") | |
1108 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))] | |
1109 "operands" | |
1110 "sfx\t%0,%2,%1") | |
1111 | |
1112 (define_insn "subti3" | |
1113 [(set (match_operand:TI 0 "spu_reg_operand" "=r") | |
1114 (minus:TI (match_operand:TI 1 "spu_reg_operand" "r") | |
1115 (match_operand:TI 2 "spu_reg_operand" "r"))) | |
1116 (clobber (match_scratch:TI 3 "=&r")) | |
1117 (clobber (match_scratch:TI 4 "=&r")) | |
1118 (clobber (match_scratch:TI 5 "=&r")) | |
1119 (clobber (match_scratch:TI 6 "=&r"))] | |
1120 "" | |
1121 "il\t%6,1\n\\ | |
1122 bg\t%3,%2,%1\n\\ | |
1123 xor\t%3,%3,%6\n\\ | |
1124 sf\t%4,%2,%1\n\\ | |
1125 shlqbyi\t%5,%3,4\n\\ | |
1126 bg\t%3,%5,%4\n\\ | |
1127 xor\t%3,%3,%6\n\\ | |
1128 sf\t%4,%5,%4\n\\ | |
1129 shlqbyi\t%5,%3,4\n\\ | |
1130 bg\t%3,%5,%4\n\\ | |
1131 xor\t%3,%3,%6\n\\ | |
1132 sf\t%4,%5,%4\n\\ | |
1133 shlqbyi\t%5,%3,4\n\\ | |
1134 sf\t%0,%5,%4" | |
1135 [(set_attr "type" "multi0") | |
1136 (set_attr "length" "56")]) | |
1137 | |
1138 (define_insn "sub<mode>3" | |
1139 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1140 (minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1141 (match_operand:VSF 2 "spu_reg_operand" "r")))] | |
1142 "" | |
1143 "fs\t%0,%1,%2" | |
1144 [(set_attr "type" "fp6")]) | |
1145 | |
1146 (define_insn "sub<mode>3" | |
1147 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1148 (minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1149 (match_operand:VDF 2 "spu_reg_operand" "r")))] | |
1150 "" | |
1151 "dfs\t%0,%1,%2" | |
1152 [(set_attr "type" "fpd")]) | |
1153 | |
1154 | |
1155 ;; neg | |
1156 | |
1157 (define_expand "negv16qi2" | |
1158 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
1159 (neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))] | |
1160 "" | |
1161 "{ | |
1162 rtx zero = gen_reg_rtx (V16QImode); | |
1163 emit_move_insn (zero, CONST0_RTX (V16QImode)); | |
1164 emit_insn (gen_subv16qi3 (operands[0], zero, operands[1])); | |
1165 DONE; | |
1166 }") | |
1167 | |
1168 (define_insn "neg<mode>2" | |
1169 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r") | |
1170 (neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))] | |
1171 "" | |
1172 "sf<bh>i\t%0,%1,0") | |
1173 | |
1174 (define_expand "negdi2" | |
1175 [(set (match_operand:DI 0 "spu_reg_operand" "") | |
1176 (neg:DI (match_operand:DI 1 "spu_reg_operand" "")))] | |
1177 "" | |
1178 { | |
1179 rtx zero = gen_reg_rtx(DImode); | |
1180 emit_move_insn(zero, GEN_INT(0)); | |
1181 emit_insn (gen_subdi3(operands[0], zero, operands[1])); | |
1182 DONE; | |
1183 }) | |
1184 | |
1185 (define_expand "negti2" | |
1186 [(set (match_operand:TI 0 "spu_reg_operand" "") | |
1187 (neg:TI (match_operand:TI 1 "spu_reg_operand" "")))] | |
1188 "" | |
1189 { | |
1190 rtx zero = gen_reg_rtx(TImode); | |
1191 emit_move_insn(zero, GEN_INT(0)); | |
1192 emit_insn (gen_subti3(operands[0], zero, operands[1])); | |
1193 DONE; | |
1194 }) | |
1195 | |
1196 (define_expand "neg<mode>2" | |
1197 [(parallel | |
1198 [(set (match_operand:VSF 0 "spu_reg_operand" "") | |
1199 (neg:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) | |
1200 (use (match_dup 2))])] | |
1201 "" | |
1202 "operands[2] = gen_reg_rtx (<F2I>mode); | |
1203 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));") | |
1204 | |
1205 (define_expand "neg<mode>2" | |
1206 [(parallel | |
1207 [(set (match_operand:VDF 0 "spu_reg_operand" "") | |
1208 (neg:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) | |
1209 (use (match_dup 2))])] | |
1210 "" | |
1211 "operands[2] = gen_reg_rtx (<F2I>mode); | |
1212 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));") | |
1213 | |
1214 (define_insn_and_split "_neg<mode>2" | |
1215 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") | |
1216 (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) | |
1217 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))] | |
1218 "" | |
1219 "#" | |
1220 "" | |
1221 [(set (match_dup:<F2I> 3) | |
1222 (xor:<F2I> (match_dup:<F2I> 4) | |
1223 (match_dup:<F2I> 2)))] | |
1224 { | |
1225 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]); | |
1226 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]); | |
1227 }) | |
1228 | |
1229 | |
1230 ;; abs | |
1231 | |
1232 (define_expand "abs<mode>2" | |
1233 [(parallel | |
1234 [(set (match_operand:VSF 0 "spu_reg_operand" "") | |
1235 (abs:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) | |
1236 (use (match_dup 2))])] | |
1237 "" | |
1238 "operands[2] = gen_reg_rtx (<F2I>mode); | |
1239 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));") | |
1240 | |
1241 (define_expand "abs<mode>2" | |
1242 [(parallel | |
1243 [(set (match_operand:VDF 0 "spu_reg_operand" "") | |
1244 (abs:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) | |
1245 (use (match_dup 2))])] | |
1246 "" | |
1247 "operands[2] = gen_reg_rtx (<F2I>mode); | |
1248 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));") | |
1249 | |
1250 (define_insn_and_split "_abs<mode>2" | |
1251 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") | |
1252 (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) | |
1253 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))] | |
1254 "" | |
1255 "#" | |
1256 "" | |
1257 [(set (match_dup:<F2I> 3) | |
1258 (and:<F2I> (match_dup:<F2I> 4) | |
1259 (match_dup:<F2I> 2)))] | |
1260 { | |
1261 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]); | |
1262 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]); | |
1263 }) | |
1264 | |
1265 | |
1266 ;; mul | |
1267 | |
1268 (define_insn "mulhi3" | |
1269 [(set (match_operand:HI 0 "spu_reg_operand" "=r,r") | |
1270 (mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r") | |
1271 (match_operand:HI 2 "spu_arith_operand" "r,B")))] | |
1272 "" | |
1273 "@ | |
1274 mpy\t%0,%1,%2 | |
1275 mpyi\t%0,%1,%2" | |
1276 [(set_attr "type" "fp7")]) | |
1277 | |
1278 (define_expand "mulv8hi3" | |
1279 [(set (match_operand:V8HI 0 "spu_reg_operand" "") | |
1280 (mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "") | |
1281 (match_operand:V8HI 2 "spu_reg_operand" "")))] | |
1282 "" | |
1283 "{ | |
1284 rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); | |
1285 rtx low = gen_reg_rtx (V4SImode); | |
1286 rtx high = gen_reg_rtx (V4SImode); | |
1287 rtx shift = gen_reg_rtx (V4SImode); | |
1288 rtx mask = gen_reg_rtx (V4SImode); | |
1289 | |
1290 emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff)); | |
1291 emit_insn (gen_spu_mpyhh (high, operands[1], operands[2])); | |
1292 emit_insn (gen_spu_mpy (low, operands[1], operands[2])); | |
1293 emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16))); | |
1294 emit_insn (gen_selb (result, shift, low, mask)); | |
1295 DONE; | |
1296 }") | |
1297 | |
1298 (define_expand "mul<mode>3" | |
1299 [(parallel | |
1300 [(set (match_operand:VSI 0 "spu_reg_operand" "") | |
1301 (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "") | |
1302 (match_operand:VSI 2 "spu_reg_operand" ""))) | |
1303 (clobber (match_dup:VSI 3)) | |
1304 (clobber (match_dup:VSI 4)) | |
1305 (clobber (match_dup:VSI 5)) | |
1306 (clobber (match_dup:VSI 6))])] | |
1307 "" | |
1308 { | |
1309 operands[3] = gen_reg_rtx(<MODE>mode); | |
1310 operands[4] = gen_reg_rtx(<MODE>mode); | |
1311 operands[5] = gen_reg_rtx(<MODE>mode); | |
1312 operands[6] = gen_reg_rtx(<MODE>mode); | |
1313 }) | |
1314 | |
1315 (define_insn_and_split "_mulsi3" | |
1316 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1317 (mult:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1318 (match_operand:SI 2 "spu_arith_operand" "rK"))) | |
1319 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r")) | |
1320 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r")) | |
1321 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r")) | |
1322 (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))] | |
1323 "" | |
1324 "#" | |
1325 "" | |
1326 [(set (match_dup:SI 0) | |
1327 (mult:SI (match_dup:SI 1) | |
1328 (match_dup:SI 2)))] | |
1329 { | |
1330 HOST_WIDE_INT val = 0; | |
1331 rtx a = operands[3]; | |
1332 rtx b = operands[4]; | |
1333 rtx c = operands[5]; | |
1334 rtx d = operands[6]; | |
1335 if (GET_CODE(operands[2]) == CONST_INT) | |
1336 { | |
1337 val = INTVAL(operands[2]); | |
1338 emit_move_insn(d, operands[2]); | |
1339 operands[2] = d; | |
1340 } | |
1341 if (val && (val & 0xffff) == 0) | |
1342 { | |
1343 emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1])); | |
1344 } | |
1345 else if (val > 0 && val < 0x10000) | |
1346 { | |
1347 rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d; | |
1348 emit_insn (gen_mpyh_si(a, operands[1], operands[2])); | |
1349 emit_insn (gen_mpyu_si(c, operands[1], cst)); | |
1350 emit_insn (gen_addsi3(operands[0], a, c)); | |
1351 } | |
1352 else | |
1353 { | |
1354 emit_insn (gen_mpyh_si(a, operands[1], operands[2])); | |
1355 emit_insn (gen_mpyh_si(b, operands[2], operands[1])); | |
1356 emit_insn (gen_mpyu_si(c, operands[1], operands[2])); | |
1357 emit_insn (gen_addsi3(d, a, b)); | |
1358 emit_insn (gen_addsi3(operands[0], d, c)); | |
1359 } | |
1360 DONE; | |
1361 }) | |
1362 | |
1363 (define_insn_and_split "_mulv4si3" | |
1364 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
1365 (mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r") | |
1366 (match_operand:V4SI 2 "spu_reg_operand" "r"))) | |
1367 (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r")) | |
1368 (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r")) | |
1369 (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r")) | |
1370 (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))] | |
1371 "" | |
1372 "#" | |
1373 "" | |
1374 [(set (match_dup:V4SI 0) | |
1375 (mult:V4SI (match_dup:V4SI 1) | |
1376 (match_dup:V4SI 2)))] | |
1377 { | |
1378 rtx a = operands[3]; | |
1379 rtx b = operands[4]; | |
1380 rtx c = operands[5]; | |
1381 rtx d = operands[6]; | |
1382 rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0); | |
1383 rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0); | |
1384 emit_insn (gen_spu_mpyh(a, op1, op2)); | |
1385 emit_insn (gen_spu_mpyh(b, op2, op1)); | |
1386 emit_insn (gen_spu_mpyu(c, op1, op2)); | |
1387 emit_insn (gen_addv4si3(d, a, b)); | |
1388 emit_insn (gen_addv4si3(operands[0], d, c)); | |
1389 DONE; | |
1390 }) | |
1391 | |
1392 (define_insn "mulhisi3" | |
1393 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1394 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1395 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))] | |
1396 "" | |
1397 "mpy\t%0,%1,%2" | |
1398 [(set_attr "type" "fp7")]) | |
1399 | |
1400 (define_insn "mulhisi3_imm" | |
1401 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1402 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1403 (match_operand:SI 2 "imm_K_operand" "K")))] | |
1404 "" | |
1405 "mpyi\t%0,%1,%2" | |
1406 [(set_attr "type" "fp7")]) | |
1407 | |
1408 (define_insn "umulhisi3" | |
1409 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1410 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1411 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))] | |
1412 "" | |
1413 "mpyu\t%0,%1,%2" | |
1414 [(set_attr "type" "fp7")]) | |
1415 | |
1416 (define_insn "umulhisi3_imm" | |
1417 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1418 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1419 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))] | |
1420 "" | |
1421 "mpyui\t%0,%1,%2" | |
1422 [(set_attr "type" "fp7")]) | |
1423 | |
1424 (define_insn "mpyu_si" | |
1425 [(set (match_operand:SI 0 "spu_reg_operand" "=r,r") | |
1426 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r") | |
1427 (const_int 65535)) | |
1428 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K") | |
1429 (const_int 65535))))] | |
1430 "" | |
1431 "@ | |
1432 mpyu\t%0,%1,%2 | |
1433 mpyui\t%0,%1,%2" | |
1434 [(set_attr "type" "fp7")]) | |
1435 | |
1436 ;; This isn't always profitable to use. Consider r = a * b + c * d. | |
1437 ;; It's faster to do the multiplies in parallel then add them. If we | |
1438 ;; merge a multiply and add it prevents the multiplies from happening in | |
1439 ;; parallel. | |
1440 (define_insn "mpya_si" | |
1441 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1442 (plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1443 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))) | |
1444 (match_operand:SI 3 "spu_reg_operand" "r")))] | |
1445 "0" | |
1446 "mpya\t%0,%1,%2,%3" | |
1447 [(set_attr "type" "fp7")]) | |
1448 | |
1449 (define_insn "mpyh_si" | |
1450 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1451 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1452 (const_int -65536)) | |
1453 (and:SI (match_operand:SI 2 "spu_reg_operand" "r") | |
1454 (const_int 65535))))] | |
1455 "" | |
1456 "mpyh\t%0,%1,%2" | |
1457 [(set_attr "type" "fp7")]) | |
1458 | |
1459 (define_insn "mpys_si" | |
1460 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1461 (ashiftrt:SI | |
1462 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) | |
1463 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))) | |
1464 (const_int 16)))] | |
1465 "" | |
1466 "mpys\t%0,%1,%2" | |
1467 [(set_attr "type" "fp7")]) | |
1468 | |
1469 (define_insn "mpyhh_si" | |
1470 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1471 (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1472 (const_int 16)) | |
1473 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") | |
1474 (const_int 16))))] | |
1475 "" | |
1476 "mpyhh\t%0,%1,%2" | |
1477 [(set_attr "type" "fp7")]) | |
1478 | |
1479 (define_insn "mpyhhu_si" | |
1480 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1481 (mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1482 (const_int 16)) | |
1483 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") | |
1484 (const_int 16))))] | |
1485 "" | |
1486 "mpyhhu\t%0,%1,%2" | |
1487 [(set_attr "type" "fp7")]) | |
1488 | |
1489 (define_insn "mpyhha_si" | |
1490 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
1491 (plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1492 (const_int 16)) | |
1493 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") | |
1494 (const_int 16))) | |
1495 (match_operand:SI 3 "spu_reg_operand" "0")))] | |
1496 "0" | |
1497 "mpyhha\t%0,%1,%2" | |
1498 [(set_attr "type" "fp7")]) | |
1499 | |
1500 (define_insn "mul<mode>3" | |
1501 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") | |
1502 (mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r") | |
1503 (match_operand:VSDF 2 "spu_reg_operand" "r")))] | |
1504 "" | |
1505 "<d>fm\t%0,%1,%2" | |
1506 [(set_attr "type" "fp<d6>")]) | |
1507 | |
1508 (define_insn "fma_<mode>" | |
1509 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1510 (plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1511 (match_operand:VSF 2 "spu_reg_operand" "r")) | |
1512 (match_operand:VSF 3 "spu_reg_operand" "r")))] | |
1513 "" | |
1514 "fma\t%0,%1,%2,%3" | |
1515 [(set_attr "type" "fp6")]) | |
1516 | |
1517 (define_insn "fnms_<mode>" | |
1518 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1519 (minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r") | |
1520 (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1521 (match_operand:VSF 2 "spu_reg_operand" "r"))))] | |
1522 "" | |
1523 "fnms\t%0,%1,%2,%3" | |
1524 [(set_attr "type" "fp6")]) | |
1525 | |
1526 (define_insn "fms_<mode>" | |
1527 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1528 (minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1529 (match_operand:VSF 2 "spu_reg_operand" "r")) | |
1530 (match_operand:VSF 3 "spu_reg_operand" "r")))] | |
1531 "" | |
1532 "fms\t%0,%1,%2,%3" | |
1533 [(set_attr "type" "fp6")]) | |
1534 | |
1535 (define_insn "fma_<mode>" | |
1536 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1537 (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1538 (match_operand:VDF 2 "spu_reg_operand" "r")) | |
1539 (match_operand:VDF 3 "spu_reg_operand" "0")))] | |
1540 "" | |
1541 "dfma\t%0,%1,%2" | |
1542 [(set_attr "type" "fpd")]) | |
1543 | |
1544 (define_insn "fnma_<mode>" | |
1545 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1546 (neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1547 (match_operand:VDF 2 "spu_reg_operand" "r")) | |
1548 (match_operand:VDF 3 "spu_reg_operand" "0"))))] | |
1549 "" | |
1550 "dfnma\t%0,%1,%2" | |
1551 [(set_attr "type" "fpd")]) | |
1552 | |
1553 (define_insn "fnms_<mode>" | |
1554 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1555 (minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0") | |
1556 (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1557 (match_operand:VDF 2 "spu_reg_operand" "r"))))] | |
1558 "" | |
1559 "dfnms\t%0,%1,%2" | |
1560 [(set_attr "type" "fpd")]) | |
1561 | |
1562 (define_insn "fms_<mode>" | |
1563 [(set (match_operand:VDF 0 "spu_reg_operand" "=r") | |
1564 (minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") | |
1565 (match_operand:VDF 2 "spu_reg_operand" "r")) | |
1566 (match_operand:VDF 3 "spu_reg_operand" "0")))] | |
1567 "" | |
1568 "dfms\t%0,%1,%2" | |
1569 [(set_attr "type" "fpd")]) | |
1570 | |
1571 | |
1572 ;; mul highpart, used for divide by constant optimizations. | |
1573 | |
1574 (define_expand "smulsi3_highpart" | |
1575 [(set (match_operand:SI 0 "register_operand" "") | |
1576 (truncate:SI | |
1577 (ashiftrt:DI | |
1578 (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "")) | |
1579 (sign_extend:DI (match_operand:SI 2 "register_operand" ""))) | |
1580 (const_int 32))))] | |
1581 "" | |
1582 { | |
1583 rtx t0 = gen_reg_rtx (SImode); | |
1584 rtx t1 = gen_reg_rtx (SImode); | |
1585 rtx t2 = gen_reg_rtx (SImode); | |
1586 rtx t3 = gen_reg_rtx (SImode); | |
1587 rtx t4 = gen_reg_rtx (SImode); | |
1588 rtx t5 = gen_reg_rtx (SImode); | |
1589 rtx t6 = gen_reg_rtx (SImode); | |
1590 rtx t7 = gen_reg_rtx (SImode); | |
1591 rtx t8 = gen_reg_rtx (SImode); | |
1592 rtx t9 = gen_reg_rtx (SImode); | |
1593 rtx t11 = gen_reg_rtx (SImode); | |
1594 rtx t12 = gen_reg_rtx (SImode); | |
1595 rtx t14 = gen_reg_rtx (SImode); | |
1596 rtx t15 = gen_reg_rtx (HImode); | |
1597 rtx t16 = gen_reg_rtx (HImode); | |
1598 rtx t17 = gen_reg_rtx (HImode); | |
1599 rtx t18 = gen_reg_rtx (HImode); | |
1600 rtx t19 = gen_reg_rtx (SImode); | |
1601 rtx t20 = gen_reg_rtx (SImode); | |
1602 rtx t21 = gen_reg_rtx (SImode); | |
1603 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2); | |
1604 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2); | |
1605 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2); | |
1606 rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2); | |
1607 | |
1608 rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16))); | |
1609 emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16))); | |
1610 emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi)); | |
1611 emit_insn (gen_mpyh_si (t3, operands[1], operands[2])); | |
1612 emit_insn (gen_mpyh_si (t4, operands[2], operands[1])); | |
1613 emit_insn (gen_mpyhh_si (t5, operands[1], operands[2])); | |
1614 emit_insn (gen_mpys_si (t6, t0_hi, op2_hi)); | |
1615 emit_insn (gen_mpys_si (t7, t1_hi, op1_hi)); | |
1616 | |
1617 /* Gen carry bits (in t9 and t11). */ | |
1618 emit_insn (gen_addsi3 (t8, t2, t3)); | |
1619 emit_insn (gen_cg_si (t9, t2, t3)); | |
1620 emit_insn (gen_cg_si (t11, t8, t4)); | |
1621 | |
1622 /* Gen high 32 bits in operand[0]. Correct for mpys. */ | |
1623 emit_insn (gen_addx_si (t12, t5, t6, t9)); | |
1624 emit_insn (gen_addx_si (t14, t12, t7, t11)); | |
1625 | |
1626 /* mpys treats both operands as signed when we really want it to treat | |
1627 the first operand as signed and the second operand as unsigned. | |
1628 The code below corrects for that difference. */ | |
1629 emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1))); | |
1630 emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1))); | |
1631 emit_insn (gen_andc_hi (t17, t1_hi, t15)); | |
1632 emit_insn (gen_andc_hi (t18, t0_hi, t16)); | |
1633 emit_insn (gen_extendhisi2 (t19, t17)); | |
1634 emit_insn (gen_extendhisi2 (t20, t18)); | |
1635 emit_insn (gen_addsi3 (t21, t19, t20)); | |
1636 emit_insn (gen_addsi3 (operands[0], t14, t21)); | |
1637 unshare_all_rtl_in_chain (insn); | |
1638 DONE; | |
1639 }) | |
1640 | |
1641 (define_expand "umulsi3_highpart" | |
1642 [(set (match_operand:SI 0 "register_operand" "") | |
1643 (truncate:SI | |
1644 (ashiftrt:DI | |
1645 (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "")) | |
1646 (zero_extend:DI (match_operand:SI 2 "register_operand" ""))) | |
1647 (const_int 32))))] | |
1648 "" | |
1649 | |
1650 { | |
1651 rtx t0 = gen_reg_rtx (SImode); | |
1652 rtx t1 = gen_reg_rtx (SImode); | |
1653 rtx t2 = gen_reg_rtx (SImode); | |
1654 rtx t3 = gen_reg_rtx (SImode); | |
1655 rtx t4 = gen_reg_rtx (SImode); | |
1656 rtx t5 = gen_reg_rtx (SImode); | |
1657 rtx t6 = gen_reg_rtx (SImode); | |
1658 rtx t7 = gen_reg_rtx (SImode); | |
1659 rtx t8 = gen_reg_rtx (SImode); | |
1660 rtx t9 = gen_reg_rtx (SImode); | |
1661 rtx t10 = gen_reg_rtx (SImode); | |
1662 rtx t12 = gen_reg_rtx (SImode); | |
1663 rtx t13 = gen_reg_rtx (SImode); | |
1664 rtx t14 = gen_reg_rtx (SImode); | |
1665 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2); | |
1666 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2); | |
1667 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2); | |
1668 | |
1669 rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16))); | |
1670 emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi)); | |
1671 emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi)); | |
1672 emit_insn (gen_mpyhhu_si (t3, operands[1], t0)); | |
1673 emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2])); | |
1674 emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16))); | |
1675 emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16))); | |
1676 emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16))); | |
1677 emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16))); | |
1678 | |
1679 /* Gen carry bits (in t10 and t12). */ | |
1680 emit_insn (gen_addsi3 (t9, t1, t5)); | |
1681 emit_insn (gen_cg_si (t10, t1, t5)); | |
1682 emit_insn (gen_cg_si (t12, t9, t6)); | |
1683 | |
1684 /* Gen high 32 bits in operand[0]. */ | |
1685 emit_insn (gen_addx_si (t13, t4, t7, t10)); | |
1686 emit_insn (gen_addx_si (t14, t13, t8, t12)); | |
1687 emit_insn (gen_movsi (operands[0], t14)); | |
1688 unshare_all_rtl_in_chain (insn); | |
1689 | |
1690 DONE; | |
1691 }) | |
1692 | |
1693 ;; div | |
1694 | |
1695 ;; Not necessarily the best implementation of divide but faster then | |
1696 ;; the default that gcc provides because this is inlined and it uses | |
1697 ;; clz. | |
1698 (define_insn "divmodsi4" | |
1699 [(set (match_operand:SI 0 "spu_reg_operand" "=&r") | |
1700 (div:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1701 (match_operand:SI 2 "spu_reg_operand" "r"))) | |
1702 (set (match_operand:SI 3 "spu_reg_operand" "=&r") | |
1703 (mod:SI (match_dup 1) | |
1704 (match_dup 2))) | |
1705 (clobber (match_scratch:SI 4 "=&r")) | |
1706 (clobber (match_scratch:SI 5 "=&r")) | |
1707 (clobber (match_scratch:SI 6 "=&r")) | |
1708 (clobber (match_scratch:SI 7 "=&r")) | |
1709 (clobber (match_scratch:SI 8 "=&r")) | |
1710 (clobber (match_scratch:SI 9 "=&r")) | |
1711 (clobber (match_scratch:SI 10 "=&r")) | |
1712 (clobber (match_scratch:SI 11 "=&r")) | |
1713 (clobber (match_scratch:SI 12 "=&r")) | |
1714 (clobber (reg:SI 130))] | |
1715 "" | |
1716 "heqi %2,0\\n\\ | |
1717 hbrr 3f,1f\\n\\ | |
1718 sfi %8,%1,0\\n\\ | |
1719 sfi %9,%2,0\\n\\ | |
1720 cgti %10,%1,-1\\n\\ | |
1721 cgti %11,%2,-1\\n\\ | |
1722 selb %8,%8,%1,%10\\n\\ | |
1723 selb %9,%9,%2,%11\\n\\ | |
1724 clz %4,%8\\n\\ | |
1725 clz %7,%9\\n\\ | |
1726 il %5,1\\n\\ | |
1727 fsmbi %0,0\\n\\ | |
1728 sf %7,%4,%7\\n\\ | |
1729 shlqbyi %3,%8,0\\n\\ | |
1730 xor %11,%10,%11\\n\\ | |
1731 shl %5,%5,%7\\n\\ | |
1732 shl %4,%9,%7\\n\\ | |
1733 lnop \\n\\ | |
1734 1: or %12,%0,%5\\n\\ | |
1735 rotqmbii %5,%5,-1\\n\\ | |
1736 clgt %6,%4,%3\\n\\ | |
1737 lnop \\n\\ | |
1738 sf %7,%4,%3\\n\\ | |
1739 rotqmbii %4,%4,-1\\n\\ | |
1740 selb %0,%12,%0,%6\\n\\ | |
1741 lnop \\n\\ | |
1742 selb %3,%7,%3,%6\\n\\ | |
1743 3: brnz %5,1b\\n\\ | |
1744 2: sfi %8,%3,0\\n\\ | |
1745 sfi %9,%0,0\\n\\ | |
1746 selb %3,%8,%3,%10\\n\\ | |
1747 selb %0,%0,%9,%11" | |
1748 [(set_attr "type" "multi0") | |
1749 (set_attr "length" "128")]) | |
1750 | |
1751 (define_insn "udivmodsi4" | |
1752 [(set (match_operand:SI 0 "spu_reg_operand" "=&r") | |
1753 (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r") | |
1754 (match_operand:SI 2 "spu_reg_operand" "r"))) | |
1755 (set (match_operand:SI 3 "spu_reg_operand" "=&r") | |
1756 (umod:SI (match_dup 1) | |
1757 (match_dup 2))) | |
1758 (clobber (match_scratch:SI 4 "=&r")) | |
1759 (clobber (match_scratch:SI 5 "=&r")) | |
1760 (clobber (match_scratch:SI 6 "=&r")) | |
1761 (clobber (match_scratch:SI 7 "=&r")) | |
1762 (clobber (match_scratch:SI 8 "=&r")) | |
1763 (clobber (reg:SI 130))] | |
1764 "" | |
1765 "heqi %2,0\\n\\ | |
1766 hbrr 3f,1f\\n\\ | |
1767 clz %7,%2\\n\\ | |
1768 clz %4,%1\\n\\ | |
1769 il %5,1\\n\\ | |
1770 fsmbi %0,0\\n\\ | |
1771 sf %7,%4,%7\\n\\ | |
1772 ori %3,%1,0\\n\\ | |
1773 shl %5,%5,%7\\n\\ | |
1774 shl %4,%2,%7\\n\\ | |
1775 1: or %8,%0,%5\\n\\ | |
1776 rotqmbii %5,%5,-1\\n\\ | |
1777 clgt %6,%4,%3\\n\\ | |
1778 lnop \\n\\ | |
1779 sf %7,%4,%3\\n\\ | |
1780 rotqmbii %4,%4,-1\\n\\ | |
1781 selb %0,%8,%0,%6\\n\\ | |
1782 lnop \\n\\ | |
1783 selb %3,%7,%3,%6\\n\\ | |
1784 3: brnz %5,1b\\n\\ | |
1785 2:" | |
1786 [(set_attr "type" "multi0") | |
1787 (set_attr "length" "80")]) | |
1788 | |
1789 (define_expand "div<mode>3" | |
1790 [(parallel | |
1791 [(set (match_operand:VSF 0 "spu_reg_operand" "") | |
1792 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "") | |
1793 (match_operand:VSF 2 "spu_reg_operand" ""))) | |
1794 (clobber (match_scratch:VSF 3 "")) | |
1795 (clobber (match_scratch:VSF 4 "")) | |
1796 (clobber (match_scratch:VSF 5 ""))])] | |
1797 "" | |
1798 "") | |
1799 | |
1800 (define_insn_and_split "*div<mode>3_fast" | |
1801 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1802 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1803 (match_operand:VSF 2 "spu_reg_operand" "r"))) | |
1804 (clobber (match_scratch:VSF 3 "=&r")) | |
1805 (clobber (match_scratch:VSF 4 "=&r")) | |
1806 (clobber (scratch:VSF))] | |
1807 "flag_unsafe_math_optimizations" | |
1808 "#" | |
1809 "reload_completed" | |
1810 [(set (match_dup:VSF 0) | |
1811 (div:VSF (match_dup:VSF 1) | |
1812 (match_dup:VSF 2))) | |
1813 (clobber (match_dup:VSF 3)) | |
1814 (clobber (match_dup:VSF 4)) | |
1815 (clobber (scratch:VSF))] | |
1816 { | |
1817 emit_insn (gen_frest_<mode>(operands[3], operands[2])); | |
1818 emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3])); | |
1819 emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3])); | |
1820 emit_insn (gen_fnms_<mode>(operands[0], operands[4], operands[2], operands[1])); | |
1821 emit_insn (gen_fma_<mode>(operands[0], operands[0], operands[3], operands[4])); | |
1822 DONE; | |
1823 }) | |
1824 | |
1825 (define_insn_and_split "*div<mode>3_adjusted" | |
1826 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1827 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r") | |
1828 (match_operand:VSF 2 "spu_reg_operand" "r"))) | |
1829 (clobber (match_scratch:VSF 3 "=&r")) | |
1830 (clobber (match_scratch:VSF 4 "=&r")) | |
1831 (clobber (match_scratch:VSF 5 "=&r"))] | |
1832 "!flag_unsafe_math_optimizations" | |
1833 "#" | |
1834 "reload_completed" | |
1835 [(set (match_dup:VSF 0) | |
1836 (div:VSF (match_dup:VSF 1) | |
1837 (match_dup:VSF 2))) | |
1838 (clobber (match_dup:VSF 3)) | |
1839 (clobber (match_dup:VSF 4)) | |
1840 (clobber (match_dup:VSF 5))] | |
1841 { | |
1842 emit_insn (gen_frest_<mode> (operands[3], operands[2])); | |
1843 emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3])); | |
1844 emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3])); | |
1845 emit_insn (gen_fnms_<mode> (operands[5], operands[4], operands[2], operands[1])); | |
1846 emit_insn (gen_fma_<mode> (operands[3], operands[5], operands[3], operands[4])); | |
1847 | |
1848 /* Due to truncation error, the quotient result may be low by 1 ulp. | |
1849 Conditionally add one if the estimate is too small in magnitude. */ | |
1850 | |
1851 emit_move_insn (gen_lowpart (<F2I>mode, operands[4]), | |
1852 spu_const (<F2I>mode, 0x80000000ULL)); | |
1853 emit_move_insn (gen_lowpart (<F2I>mode, operands[5]), | |
1854 spu_const (<F2I>mode, 0x3f800000ULL)); | |
1855 emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4])); | |
1856 | |
1857 emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]), | |
1858 gen_lowpart (<F2I>mode, operands[3]), | |
1859 spu_const (<F2I>mode, 1))); | |
1860 emit_insn (gen_fnms_<mode> (operands[0], operands[2], operands[4], operands[1])); | |
1861 emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5])); | |
1862 emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]), | |
1863 gen_lowpart (<F2I>mode, operands[0]), | |
1864 spu_const (<F2I>mode, -1))); | |
1865 emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0])); | |
1866 DONE; | |
1867 }) | |
1868 | |
1869 | |
1870 ;; sqrt | |
1871 | |
1872 (define_insn_and_split "sqrtsf2" | |
1873 [(set (match_operand:SF 0 "spu_reg_operand" "=r") | |
1874 (sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r"))) | |
1875 (clobber (match_scratch:SF 2 "=&r")) | |
1876 (clobber (match_scratch:SF 3 "=&r")) | |
1877 (clobber (match_scratch:SF 4 "=&r")) | |
1878 (clobber (match_scratch:SF 5 "=&r"))] | |
1879 "" | |
1880 "#" | |
1881 "reload_completed" | |
1882 [(set (match_dup:SF 0) | |
1883 (sqrt:SF (match_dup:SF 1))) | |
1884 (clobber (match_dup:SF 2)) | |
1885 (clobber (match_dup:SF 3)) | |
1886 (clobber (match_dup:SF 4)) | |
1887 (clobber (match_dup:SF 5))] | |
1888 { | |
1889 emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode)); | |
1890 emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode)); | |
1891 emit_insn (gen_frsqest_sf(operands[2],operands[1])); | |
1892 emit_insn (gen_fi_sf(operands[2],operands[1],operands[2])); | |
1893 emit_insn (gen_mulsf3(operands[5],operands[2],operands[1])); | |
1894 emit_insn (gen_mulsf3(operands[3],operands[5],operands[3])); | |
1895 emit_insn (gen_fnms_sf(operands[4],operands[2],operands[5],operands[4])); | |
1896 emit_insn (gen_fma_sf(operands[0],operands[4],operands[3],operands[5])); | |
1897 DONE; | |
1898 }) | |
1899 | |
1900 (define_insn "frest_<mode>" | |
1901 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1902 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))] | |
1903 "" | |
1904 "frest\t%0,%1" | |
1905 [(set_attr "type" "shuf")]) | |
1906 | |
1907 (define_insn "frsqest_<mode>" | |
1908 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1909 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))] | |
1910 "" | |
1911 "frsqest\t%0,%1" | |
1912 [(set_attr "type" "shuf")]) | |
1913 | |
1914 (define_insn "fi_<mode>" | |
1915 [(set (match_operand:VSF 0 "spu_reg_operand" "=r") | |
1916 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r") | |
1917 (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))] | |
1918 "" | |
1919 "fi\t%0,%1,%2" | |
1920 [(set_attr "type" "fp7")]) | |
1921 | |
1922 | |
1923 ;; and | |
1924 | |
1925 (define_insn "and<mode>3" | |
1926 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r") | |
1927 (and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r") | |
1928 (match_operand:MOV 2 "spu_logical_operand" "r,C")))] | |
1929 "" | |
1930 "@ | |
1931 and\t%0,%1,%2 | |
1932 and%j2i\t%0,%1,%J2") | |
1933 | |
1934 (define_insn "anddi3" | |
1935 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") | |
1936 (and:DI (match_operand:DI 1 "spu_reg_operand" "r,r") | |
1937 (match_operand:DI 2 "spu_logical_operand" "r,c")))] | |
1938 "" | |
1939 "@ | |
1940 and\t%0,%1,%2 | |
1941 and%k2i\t%0,%1,%K2") | |
1942 | |
1943 (define_insn "andti3" | |
1944 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
1945 (and:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
1946 (match_operand:TI 2 "spu_logical_operand" "r,Y")))] | |
1947 "" | |
1948 "@ | |
1949 and\t%0,%1,%2 | |
1950 and%m2i\t%0,%1,%L2") | |
1951 | |
1952 (define_insn "andc_<mode>" | |
1953 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
1954 (and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r")) | |
1955 (match_operand:ALL 1 "spu_reg_operand" "r")))] | |
1956 "" | |
1957 "andc\t%0,%1,%2") | |
1958 | |
1959 (define_insn "nand_<mode>" | |
1960 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
1961 (not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r") | |
1962 (match_operand:ALL 1 "spu_reg_operand" "r"))))] | |
1963 "" | |
1964 "nand\t%0,%1,%2") | |
1965 | |
1966 | |
1967 ;; ior | |
1968 | |
1969 (define_insn "ior<mode>3" | |
1970 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r") | |
1971 (ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0") | |
1972 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))] | |
1973 "" | |
1974 "@ | |
1975 or\t%0,%1,%2 | |
1976 or%j2i\t%0,%1,%J2 | |
1977 iohl\t%0,%J2") | |
1978 | |
1979 (define_insn "iordi3" | |
1980 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r") | |
1981 (ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0") | |
1982 (match_operand:DI 2 "spu_ior_operand" "r,c,d")))] | |
1983 "" | |
1984 "@ | |
1985 or\t%0,%1,%2 | |
1986 or%k2i\t%0,%1,%K2 | |
1987 iohl\t%0,%K2") | |
1988 | |
1989 (define_insn "iorti3" | |
1990 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r") | |
1991 (ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0") | |
1992 (match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))] | |
1993 "" | |
1994 "@ | |
1995 or\t%0,%1,%2 | |
1996 or%m2i\t%0,%1,%L2 | |
1997 iohl\t%0,%L2") | |
1998 | |
1999 (define_insn "orc_<mode>" | |
2000 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
2001 (ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r")) | |
2002 (match_operand:ALL 1 "spu_reg_operand" "r")))] | |
2003 "" | |
2004 "orc\t%0,%1,%2") | |
2005 | |
2006 (define_insn "nor_<mode>" | |
2007 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
2008 (not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r") | |
2009 (match_operand:ALL 2 "spu_reg_operand" "r"))))] | |
2010 "" | |
2011 "nor\t%0,%1,%2") | |
2012 | |
2013 ;; xor | |
2014 | |
2015 (define_insn "xor<mode>3" | |
2016 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r") | |
2017 (xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r") | |
2018 (match_operand:MOV 2 "spu_logical_operand" "r,B")))] | |
2019 "" | |
2020 "@ | |
2021 xor\t%0,%1,%2 | |
2022 xor%j2i\t%0,%1,%J2") | |
2023 | |
2024 (define_insn "xordi3" | |
2025 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") | |
2026 (xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r") | |
2027 (match_operand:DI 2 "spu_logical_operand" "r,c")))] | |
2028 "" | |
2029 "@ | |
2030 xor\t%0,%1,%2 | |
2031 xor%k2i\t%0,%1,%K2") | |
2032 | |
2033 (define_insn "xorti3" | |
2034 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2035 (xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2036 (match_operand:TI 2 "spu_logical_operand" "r,Y")))] | |
2037 "" | |
2038 "@ | |
2039 xor\t%0,%1,%2 | |
2040 xor%m2i\t%0,%1,%L2") | |
2041 | |
2042 (define_insn "eqv_<mode>" | |
2043 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
2044 (not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r") | |
2045 (match_operand:ALL 2 "spu_reg_operand" "r"))))] | |
2046 "" | |
2047 "eqv\t%0,%1,%2") | |
2048 | |
2049 ;; one_cmpl | |
2050 | |
2051 (define_insn "one_cmpl<mode>2" | |
2052 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
2053 (not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))] | |
2054 "" | |
2055 "nor\t%0,%1,%1") | |
2056 | |
2057 | |
2058 ;; selb | |
2059 | |
2060 (define_expand "selb" | |
2061 [(set (match_operand 0 "spu_reg_operand" "") | |
2062 (unspec [(match_operand 1 "spu_reg_operand" "") | |
2063 (match_operand 2 "spu_reg_operand" "") | |
2064 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))] | |
2065 "" | |
2066 { | |
2067 rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]); | |
2068 PUT_MODE (SET_SRC (s), GET_MODE (operands[0])); | |
2069 emit_insn (s); | |
2070 DONE; | |
2071 }) | |
2072 | |
2073 ;; This could be defined as a combination of logical operations, but at | |
2074 ;; one time it caused a crash due to recursive expansion of rtl during CSE. | |
2075 (define_insn "_selb" | |
2076 [(set (match_operand 0 "spu_reg_operand" "=r") | |
2077 (unspec [(match_operand 1 "spu_reg_operand" "r") | |
2078 (match_operand 2 "spu_reg_operand" "r") | |
2079 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))] | |
2080 "GET_MODE(operands[0]) == GET_MODE(operands[1]) | |
2081 && GET_MODE(operands[1]) == GET_MODE(operands[2])" | |
2082 "selb\t%0,%1,%2,%3") | |
2083 | |
2084 | |
2085 ;; Misc. byte/bit operations | |
2086 ;; clz/ctz/ffs/popcount/parity | |
2087 ;; cntb/sumb | |
2088 | |
2089 (define_insn "clz<mode>2" | |
2090 [(set (match_operand:VSI 0 "spu_reg_operand" "=r") | |
2091 (clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))] | |
2092 "" | |
2093 "clz\t%0,%1") | |
2094 | |
2095 (define_expand "ctz<mode>2" | |
2096 [(set (match_dup 2) | |
2097 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" ""))) | |
2098 (set (match_dup 3) (and:VSI (match_dup 1) | |
2099 (match_dup 2))) | |
2100 (set (match_dup 4) (clz:VSI (match_dup 3))) | |
2101 (set (match_operand:VSI 0 "spu_reg_operand" "") | |
2102 (minus:VSI (match_dup 5) (match_dup 4)))] | |
2103 "" | |
2104 { | |
2105 operands[2] = gen_reg_rtx (<MODE>mode); | |
2106 operands[3] = gen_reg_rtx (<MODE>mode); | |
2107 operands[4] = gen_reg_rtx (<MODE>mode); | |
2108 operands[5] = spu_const(<MODE>mode, 31); | |
2109 }) | |
2110 | |
2111 (define_expand "ffs<mode>2" | |
2112 [(set (match_dup 2) | |
2113 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" ""))) | |
2114 (set (match_dup 3) (and:VSI (match_dup 1) | |
2115 (match_dup 2))) | |
2116 (set (match_dup 4) (clz:VSI (match_dup 3))) | |
2117 (set (match_operand:VSI 0 "spu_reg_operand" "") | |
2118 (minus:VSI (match_dup 5) (match_dup 4)))] | |
2119 "" | |
2120 { | |
2121 operands[2] = gen_reg_rtx (<MODE>mode); | |
2122 operands[3] = gen_reg_rtx (<MODE>mode); | |
2123 operands[4] = gen_reg_rtx (<MODE>mode); | |
2124 operands[5] = spu_const(<MODE>mode, 32); | |
2125 }) | |
2126 | |
2127 (define_expand "popcountsi2" | |
2128 [(set (match_dup 2) | |
2129 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")] | |
2130 UNSPEC_CNTB)) | |
2131 (set (match_dup 3) | |
2132 (unspec:HI [(match_dup 2)] UNSPEC_SUMB)) | |
2133 (set (match_operand:SI 0 "spu_reg_operand" "") | |
2134 (sign_extend:SI (match_dup 3)))] | |
2135 "" | |
2136 { | |
2137 operands[2] = gen_reg_rtx (SImode); | |
2138 operands[3] = gen_reg_rtx (HImode); | |
2139 }) | |
2140 | |
2141 (define_expand "paritysi2" | |
2142 [(set (match_operand:SI 0 "spu_reg_operand" "") | |
2143 (parity:SI (match_operand:SI 1 "spu_reg_operand" "")))] | |
2144 "" | |
2145 { | |
2146 operands[2] = gen_reg_rtx (SImode); | |
2147 emit_insn (gen_popcountsi2(operands[2], operands[1])); | |
2148 emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1))); | |
2149 DONE; | |
2150 }) | |
2151 | |
2152 (define_insn "cntb_si" | |
2153 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
2154 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")] | |
2155 UNSPEC_CNTB))] | |
2156 "" | |
2157 "cntb\t%0,%1" | |
2158 [(set_attr "type" "fxb")]) | |
2159 | |
2160 (define_insn "cntb_v16qi" | |
2161 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
2162 (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")] | |
2163 UNSPEC_CNTB))] | |
2164 "" | |
2165 "cntb\t%0,%1" | |
2166 [(set_attr "type" "fxb")]) | |
2167 | |
2168 (define_insn "sumb_si" | |
2169 [(set (match_operand:HI 0 "spu_reg_operand" "=r") | |
2170 (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))] | |
2171 "" | |
2172 "sumb\t%0,%1,%1" | |
2173 [(set_attr "type" "fxb")]) | |
2174 | |
2175 | |
2176 ;; ashl, vashl | |
2177 | |
2178 (define_insn "<v>ashl<mode>3" | |
2179 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2180 (ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2181 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))] | |
2182 "" | |
2183 "@ | |
2184 shl<bh>\t%0,%1,%2 | |
2185 shl<bh>i\t%0,%1,%<umask>2" | |
2186 [(set_attr "type" "fx3")]) | |
2187 | |
2188 (define_insn_and_split "ashldi3" | |
2189 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") | |
2190 (ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r") | |
2191 (match_operand:SI 2 "spu_nonmem_operand" "r,I"))) | |
2192 (clobber (match_scratch:SI 3 "=&r,X"))] | |
2193 "" | |
2194 "#" | |
2195 "reload_completed" | |
2196 [(set (match_dup:DI 0) | |
2197 (ashift:DI (match_dup:DI 1) | |
2198 (match_dup:SI 2)))] | |
2199 { | |
2200 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0])); | |
2201 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); | |
2202 rtx op2 = operands[2]; | |
2203 rtx op3 = operands[3]; | |
2204 | |
2205 if (GET_CODE (operands[2]) == REG) | |
2206 { | |
2207 emit_insn (gen_addsi3 (op3, op2, GEN_INT (64))); | |
2208 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64))); | |
2209 emit_insn (gen_shlqbybi_ti (op0, op0, op3)); | |
2210 emit_insn (gen_shlqbi_ti (op0, op0, op3)); | |
2211 } | |
2212 else | |
2213 { | |
2214 HOST_WIDE_INT val = INTVAL (operands[2]); | |
2215 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64))); | |
2216 emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8))); | |
2217 if (val % 8) | |
2218 emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8))); | |
2219 } | |
2220 DONE; | |
2221 }) | |
2222 | |
2223 (define_expand "ashlti3" | |
2224 [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "") | |
2225 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "") | |
2226 (match_operand:SI 2 "spu_nonmem_operand" ""))) | |
2227 (clobber (match_dup:TI 3))])] | |
2228 "" | |
2229 "if (GET_CODE (operands[2]) == CONST_INT) | |
2230 { | |
2231 emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2])); | |
2232 DONE; | |
2233 } | |
2234 operands[3] = gen_reg_rtx (TImode);") | |
2235 | |
2236 (define_insn_and_split "ashlti3_imm" | |
2237 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2238 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2239 (match_operand:SI 2 "immediate_operand" "O,P")))] | |
2240 "" | |
2241 "@ | |
2242 shlqbyi\t%0,%1,%h2 | |
2243 shlqbii\t%0,%1,%e2" | |
2244 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])" | |
2245 [(set (match_dup:TI 0) | |
2246 (ashift:TI (match_dup:TI 1) | |
2247 (match_dup:SI 3))) | |
2248 (set (match_dup:TI 0) | |
2249 (ashift:TI (match_dup:TI 0) | |
2250 (match_dup:SI 4)))] | |
2251 { | |
2252 HOST_WIDE_INT val = INTVAL(operands[2]); | |
2253 operands[3] = GEN_INT (val&7); | |
2254 operands[4] = GEN_INT (val&-8); | |
2255 } | |
2256 [(set_attr "type" "shuf,shuf")]) | |
2257 | |
2258 (define_insn_and_split "ashlti3_reg" | |
2259 [(set (match_operand:TI 0 "spu_reg_operand" "=r") | |
2260 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r") | |
2261 (match_operand:SI 2 "spu_reg_operand" "r"))) | |
2262 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))] | |
2263 "" | |
2264 "#" | |
2265 "" | |
2266 [(set (match_dup:TI 3) | |
2267 (ashift:TI (match_dup:TI 1) | |
2268 (and:SI (match_dup:SI 2) | |
2269 (const_int 7)))) | |
2270 (set (match_dup:TI 0) | |
2271 (ashift:TI (match_dup:TI 3) | |
2272 (and:SI (match_dup:SI 2) | |
2273 (const_int -8))))] | |
2274 "") | |
2275 | |
2276 (define_insn "shlqbybi_ti" | |
2277 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2278 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2279 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2280 (const_int -8))))] | |
2281 "" | |
2282 "@ | |
2283 shlqbybi\t%0,%1,%2 | |
2284 shlqbyi\t%0,%1,%h2" | |
2285 [(set_attr "type" "shuf,shuf")]) | |
2286 | |
2287 (define_insn "shlqbi_ti" | |
2288 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2289 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2290 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2291 (const_int 7))))] | |
2292 "" | |
2293 "@ | |
2294 shlqbi\t%0,%1,%2 | |
2295 shlqbii\t%0,%1,%e2" | |
2296 [(set_attr "type" "shuf,shuf")]) | |
2297 | |
2298 (define_insn "shlqby_ti" | |
2299 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2300 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2301 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2302 (const_int 8))))] | |
2303 "" | |
2304 "@ | |
2305 shlqby\t%0,%1,%2 | |
2306 shlqbyi\t%0,%1,%f2" | |
2307 [(set_attr "type" "shuf,shuf")]) | |
2308 | |
2309 | |
2310 ;; lshr, vlshr | |
2311 | |
2312 (define_insn_and_split "<v>lshr<mode>3" | |
2313 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2314 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2315 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))) | |
2316 (clobber (match_scratch:VHSI 3 "=&r,X"))] | |
2317 "" | |
2318 "@ | |
2319 # | |
2320 rot<bh>mi\t%0,%1,-%<umask>2" | |
2321 "reload_completed && GET_CODE (operands[2]) == REG" | |
2322 [(set (match_dup:VHSI 3) | |
2323 (neg:VHSI (match_dup:VHSI 2))) | |
2324 (set (match_dup:VHSI 0) | |
2325 (lshiftrt:VHSI (match_dup:VHSI 1) | |
2326 (neg:VHSI (match_dup:VHSI 3))))] | |
2327 "" | |
2328 [(set_attr "type" "*,fx3")]) | |
2329 | |
2330 | |
2331 (define_insn "rotm_<mode>" | |
2332 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2333 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2334 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))] | |
2335 "" | |
2336 "@ | |
2337 rot<bh>m\t%0,%1,%2 | |
2338 rot<bh>mi\t%0,%1,-%<nmask>2" | |
2339 [(set_attr "type" "fx3")]) | |
2340 | |
2341 (define_expand "lshr<mode>3" | |
2342 [(parallel [(set (match_operand:DTI 0 "spu_reg_operand" "") | |
2343 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "") | |
2344 (match_operand:SI 2 "spu_nonmem_operand" ""))) | |
2345 (clobber (match_dup:DTI 3)) | |
2346 (clobber (match_dup:SI 4)) | |
2347 (clobber (match_dup:SI 5))])] | |
2348 "" | |
2349 "if (GET_CODE (operands[2]) == CONST_INT) | |
2350 { | |
2351 emit_insn (gen_lshr<mode>3_imm(operands[0], operands[1], operands[2])); | |
2352 DONE; | |
2353 } | |
2354 operands[3] = gen_reg_rtx (<MODE>mode); | |
2355 operands[4] = gen_reg_rtx (SImode); | |
2356 operands[5] = gen_reg_rtx (SImode);") | |
2357 | |
2358 (define_insn_and_split "lshr<mode>3_imm" | |
2359 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2360 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2361 (match_operand:SI 2 "immediate_operand" "O,P")))] | |
2362 "" | |
2363 "@ | |
2364 rotqmbyi\t%0,%1,-%h2 | |
2365 rotqmbii\t%0,%1,-%e2" | |
2366 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])" | |
2367 [(set (match_dup:DTI 0) | |
2368 (lshiftrt:DTI (match_dup:DTI 1) | |
2369 (match_dup:SI 4))) | |
2370 (set (match_dup:DTI 0) | |
2371 (lshiftrt:DTI (match_dup:DTI 0) | |
2372 (match_dup:SI 5)))] | |
2373 { | |
2374 HOST_WIDE_INT val = INTVAL(operands[2]); | |
2375 operands[4] = GEN_INT (val&7); | |
2376 operands[5] = GEN_INT (val&-8); | |
2377 } | |
2378 [(set_attr "type" "shuf,shuf")]) | |
2379 | |
2380 (define_insn_and_split "lshr<mode>3_reg" | |
2381 [(set (match_operand:DTI 0 "spu_reg_operand" "=r") | |
2382 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r") | |
2383 (match_operand:SI 2 "spu_reg_operand" "r"))) | |
2384 (clobber (match_operand:DTI 3 "spu_reg_operand" "=&r")) | |
2385 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r")) | |
2386 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))] | |
2387 "" | |
2388 "#" | |
2389 "" | |
2390 [(set (match_dup:DTI 3) | |
2391 (lshiftrt:DTI (match_dup:DTI 1) | |
2392 (and:SI (neg:SI (match_dup:SI 4)) | |
2393 (const_int 7)))) | |
2394 (set (match_dup:DTI 0) | |
2395 (lshiftrt:DTI (match_dup:DTI 3) | |
2396 (and:SI (neg:SI (and:SI (match_dup:SI 5) | |
2397 (const_int -8))) | |
2398 (const_int -8))))] | |
2399 { | |
2400 emit_insn (gen_subsi3(operands[4], GEN_INT(0), operands[2])); | |
2401 emit_insn (gen_subsi3(operands[5], GEN_INT(7), operands[2])); | |
2402 }) | |
2403 | |
2404 (define_insn_and_split "shrqbybi_<mode>" | |
2405 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2406 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2407 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2408 (const_int -8)))) | |
2409 (clobber (match_scratch:SI 3 "=&r,X"))] | |
2410 "" | |
2411 "#" | |
2412 "reload_completed" | |
2413 [(set (match_dup:DTI 0) | |
2414 (lshiftrt:DTI (match_dup:DTI 1) | |
2415 (and:SI (neg:SI (and:SI (match_dup:SI 3) (const_int -8))) | |
2416 (const_int -8))))] | |
2417 { | |
2418 if (GET_CODE (operands[2]) == CONST_INT) | |
2419 operands[3] = GEN_INT (7 - INTVAL (operands[2])); | |
2420 else | |
2421 emit_insn (gen_subsi3 (operands[3], GEN_INT (7), operands[2])); | |
2422 } | |
2423 [(set_attr "type" "shuf")]) | |
2424 | |
2425 (define_insn "rotqmbybi_<mode>" | |
2426 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2427 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2428 (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2429 (const_int -8))) | |
2430 (const_int -8))))] | |
2431 "" | |
2432 "@ | |
2433 rotqmbybi\t%0,%1,%2 | |
2434 rotqmbyi\t%0,%1,-%H2" | |
2435 [(set_attr "type" "shuf")]) | |
2436 | |
2437 (define_insn_and_split "shrqbi_<mode>" | |
2438 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2439 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2440 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2441 (const_int 7)))) | |
2442 (clobber (match_scratch:SI 3 "=&r,X"))] | |
2443 "" | |
2444 "#" | |
2445 "reload_completed" | |
2446 [(set (match_dup:DTI 0) | |
2447 (lshiftrt:DTI (match_dup:DTI 1) | |
2448 (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))] | |
2449 { | |
2450 if (GET_CODE (operands[2]) == CONST_INT) | |
2451 operands[3] = GEN_INT (-INTVAL (operands[2])); | |
2452 else | |
2453 emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2])); | |
2454 } | |
2455 [(set_attr "type" "shuf")]) | |
2456 | |
2457 (define_insn "rotqmbi_<mode>" | |
2458 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2459 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2460 (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")) | |
2461 (const_int 7))))] | |
2462 "" | |
2463 "@ | |
2464 rotqmbi\t%0,%1,%2 | |
2465 rotqmbii\t%0,%1,-%E2" | |
2466 [(set_attr "type" "shuf")]) | |
2467 | |
2468 (define_insn_and_split "shrqby_<mode>" | |
2469 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2470 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2471 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2472 (const_int 8)))) | |
2473 (clobber (match_scratch:SI 3 "=&r,X"))] | |
2474 "" | |
2475 "#" | |
2476 "reload_completed" | |
2477 [(set (match_dup:DTI 0) | |
2478 (lshiftrt:DTI (match_dup:DTI 1) | |
2479 (mult:SI (neg:SI (match_dup:SI 3)) (const_int 8))))] | |
2480 { | |
2481 if (GET_CODE (operands[2]) == CONST_INT) | |
2482 operands[3] = GEN_INT (-INTVAL (operands[2])); | |
2483 else | |
2484 emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2])); | |
2485 } | |
2486 [(set_attr "type" "shuf")]) | |
2487 | |
2488 (define_insn "rotqmby_<mode>" | |
2489 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") | |
2490 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") | |
2491 (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")) | |
2492 (const_int 8))))] | |
2493 "" | |
2494 "@ | |
2495 rotqmby\t%0,%1,%2 | |
2496 rotqmbyi\t%0,%1,-%F2" | |
2497 [(set_attr "type" "shuf")]) | |
2498 | |
2499 | |
2500 ;; ashr, vashr | |
2501 | |
2502 (define_insn_and_split "<v>ashr<mode>3" | |
2503 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2504 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2505 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))) | |
2506 (clobber (match_scratch:VHSI 3 "=&r,X"))] | |
2507 "" | |
2508 "@ | |
2509 # | |
2510 rotma<bh>i\t%0,%1,-%<umask>2" | |
2511 "reload_completed && GET_CODE (operands[2]) == REG" | |
2512 [(set (match_dup:VHSI 3) | |
2513 (neg:VHSI (match_dup:VHSI 2))) | |
2514 (set (match_dup:VHSI 0) | |
2515 (ashiftrt:VHSI (match_dup:VHSI 1) | |
2516 (neg:VHSI (match_dup:VHSI 3))))] | |
2517 "" | |
2518 [(set_attr "type" "*,fx3")]) | |
2519 | |
2520 | |
2521 (define_insn "rotma_<mode>" | |
2522 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2523 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2524 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))] | |
2525 "" | |
2526 "@ | |
2527 rotma<bh>\t%0,%1,%2 | |
2528 rotma<bh>i\t%0,%1,-%<nmask>2" | |
2529 [(set_attr "type" "fx3")]) | |
2530 | |
2531 (define_insn_and_split "ashrdi3" | |
2532 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") | |
2533 (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r") | |
2534 (match_operand:SI 2 "spu_nonmem_operand" "r,I"))) | |
2535 (clobber (match_scratch:TI 3 "=&r,&r")) | |
2536 (clobber (match_scratch:TI 4 "=&r,&r")) | |
2537 (clobber (match_scratch:SI 5 "=&r,&r"))] | |
2538 "" | |
2539 "#" | |
2540 "reload_completed" | |
2541 [(set (match_dup:DI 0) | |
2542 (ashiftrt:DI (match_dup:DI 1) | |
2543 (match_dup:SI 2)))] | |
2544 { | |
2545 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0])); | |
2546 rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0)); | |
2547 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); | |
2548 rtx op1s = gen_rtx_REG (SImode, REGNO (op1)); | |
2549 rtx op2 = operands[2]; | |
2550 rtx op3 = operands[3]; | |
2551 rtx op4 = operands[4]; | |
2552 rtx op5 = operands[5]; | |
2553 | |
2554 if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63) | |
2555 { | |
2556 rtx op0s = gen_rtx_REG (SImode, REGNO (op0)); | |
2557 emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32))); | |
2558 emit_insn (gen_spu_fsm (op0v, op0s)); | |
2559 } | |
2560 else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32) | |
2561 { | |
2562 rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0)); | |
2563 HOST_WIDE_INT val = INTVAL (op2); | |
2564 emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32))); | |
2565 emit_insn (gen_spu_xswd (op0d, op0v)); | |
2566 if (val > 32) | |
2567 emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32))); | |
2568 } | |
2569 else | |
2570 { | |
2571 rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3)); | |
2572 unsigned char arr[16] = { | |
2573 0xff, 0xff, 0xff, 0xff, | |
2574 0xff, 0xff, 0xff, 0xff, | |
2575 0x00, 0x00, 0x00, 0x00, | |
2576 0x00, 0x00, 0x00, 0x00 | |
2577 }; | |
2578 | |
2579 emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31))); | |
2580 emit_move_insn (op4, array_to_constant (TImode, arr)); | |
2581 emit_insn (gen_spu_fsm (op3v, op5)); | |
2582 | |
2583 if (GET_CODE (operands[2]) == REG) | |
2584 { | |
2585 emit_insn (gen_selb (op4, op3, op1, op4)); | |
2586 emit_insn (gen_negsi2 (op5, op2)); | |
2587 emit_insn (gen_rotqbybi_ti (op0, op4, op5)); | |
2588 emit_insn (gen_rotqbi_ti (op0, op0, op5)); | |
2589 } | |
2590 else | |
2591 { | |
2592 HOST_WIDE_INT val = -INTVAL (op2); | |
2593 emit_insn (gen_selb (op0, op3, op1, op4)); | |
2594 if ((val - 7) / 8) | |
2595 emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8))); | |
2596 if (val % 8) | |
2597 emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8))); | |
2598 } | |
2599 } | |
2600 DONE; | |
2601 }) | |
2602 | |
2603 | |
2604 (define_expand "ashrti3" | |
2605 [(set (match_operand:TI 0 "spu_reg_operand" "") | |
2606 (ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "") | |
2607 (match_operand:SI 2 "spu_nonmem_operand" "")))] | |
2608 "" | |
2609 { | |
2610 rtx sign_shift = gen_reg_rtx (SImode); | |
2611 rtx sign_mask = gen_reg_rtx (TImode); | |
2612 rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0); | |
2613 rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]); | |
2614 rtx t = gen_reg_rtx (TImode); | |
2615 emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2]))); | |
2616 emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31))); | |
2617 emit_insn (gen_fsm_ti (sign_mask, sign_mask)); | |
2618 emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift)); | |
2619 emit_insn (gen_lshrti3 (t, operands[1], operands[2])); | |
2620 emit_insn (gen_iorti3 (operands[0], t, sign_mask)); | |
2621 DONE; | |
2622 }) | |
2623 | |
2624 ;; fsm is used after rotam to replicate the sign across the whole register. | |
2625 (define_insn "fsm_ti" | |
2626 [(set (match_operand:TI 0 "spu_reg_operand" "=r") | |
2627 (unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))] | |
2628 "" | |
2629 "fsm\t%0,%1" | |
2630 [(set_attr "type" "shuf")]) | |
2631 | |
2632 | |
2633 ;; vrotl, rotl | |
2634 | |
2635 (define_insn "<v>rotl<mode>3" | |
2636 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") | |
2637 (rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") | |
2638 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))] | |
2639 "" | |
2640 "@ | |
2641 rot<bh>\t%0,%1,%2 | |
2642 rot<bh>i\t%0,%1,%<umask>2" | |
2643 [(set_attr "type" "fx3")]) | |
2644 | |
2645 (define_insn "rotlti3" | |
2646 [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r") | |
2647 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r") | |
2648 (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))] | |
2649 "" | |
2650 "@ | |
2651 rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2 | |
2652 rotqbyi\t%0,%1,%h2 | |
2653 rotqbii\t%0,%1,%e2 | |
2654 rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2" | |
2655 [(set_attr "length" "8,4,4,8") | |
2656 (set_attr "type" "multi1,shuf,shuf,multi1")]) | |
2657 | |
2658 (define_insn "rotqbybi_ti" | |
2659 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2660 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2661 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2662 (const_int -8))))] | |
2663 "" | |
2664 "@ | |
2665 rotqbybi\t%0,%1,%2 | |
2666 rotqbyi\t%0,%1,%h2" | |
2667 [(set_attr "type" "shuf,shuf")]) | |
2668 | |
2669 (define_insn "rotqby_ti" | |
2670 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2671 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2672 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2673 (const_int 8))))] | |
2674 "" | |
2675 "@ | |
2676 rotqby\t%0,%1,%2 | |
2677 rotqbyi\t%0,%1,%f2" | |
2678 [(set_attr "type" "shuf,shuf")]) | |
2679 | |
2680 (define_insn "rotqbi_ti" | |
2681 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") | |
2682 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") | |
2683 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") | |
2684 (const_int 7))))] | |
2685 "" | |
2686 "@ | |
2687 rotqbi\t%0,%1,%2 | |
2688 rotqbii\t%0,%1,%e2" | |
2689 [(set_attr "type" "shuf,shuf")]) | |
2690 | |
2691 | |
2692 ;; struct extract/insert | |
2693 ;; We have to handle mem's because GCC will generate invalid SUBREG's | |
2694 ;; if it handles them. We generate better code anyway. | |
2695 | |
2696 (define_expand "extv" | |
2697 [(set (match_operand 0 "register_operand" "") | |
2698 (sign_extract (match_operand 1 "register_operand" "") | |
2699 (match_operand:SI 2 "const_int_operand" "") | |
2700 (match_operand:SI 3 "const_int_operand" "")))] | |
2701 "" | |
2702 { spu_expand_extv(operands, 0); DONE; }) | |
2703 | |
2704 (define_expand "extzv" | |
2705 [(set (match_operand 0 "register_operand" "") | |
2706 (zero_extract (match_operand 1 "register_operand" "") | |
2707 (match_operand:SI 2 "const_int_operand" "") | |
2708 (match_operand:SI 3 "const_int_operand" "")))] | |
2709 "" | |
2710 { spu_expand_extv(operands, 1); DONE; }) | |
2711 | |
2712 (define_expand "insv" | |
2713 [(set (zero_extract (match_operand 0 "register_operand" "") | |
2714 (match_operand:SI 1 "const_int_operand" "") | |
2715 (match_operand:SI 2 "const_int_operand" "")) | |
2716 (match_operand 3 "nonmemory_operand" ""))] | |
2717 "" | |
2718 { spu_expand_insv(operands); DONE; }) | |
2719 | |
2720 | |
2721 ;; String/block move insn. | |
2722 ;; Argument 0 is the destination | |
2723 ;; Argument 1 is the source | |
2724 ;; Argument 2 is the length | |
2725 ;; Argument 3 is the alignment | |
2726 | |
2727 (define_expand "movstrsi" | |
2728 [(parallel [(set (match_operand:BLK 0 "" "") | |
2729 (match_operand:BLK 1 "" "")) | |
2730 (use (match_operand:SI 2 "" "")) | |
2731 (use (match_operand:SI 3 "" ""))])] | |
2732 "" | |
2733 " | |
2734 { | |
2735 if (spu_expand_block_move (operands)) | |
2736 DONE; | |
2737 else | |
2738 FAIL; | |
2739 }") | |
2740 | |
2741 | |
2742 ;; jump | |
2743 | |
2744 (define_insn "indirect_jump" | |
2745 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))] | |
2746 "" | |
2747 "bi\t%0" | |
2748 [(set_attr "type" "br")]) | |
2749 | |
2750 (define_insn "jump" | |
2751 [(set (pc) | |
2752 (label_ref (match_operand 0 "" "")))] | |
2753 "" | |
2754 "br\t%0" | |
2755 [(set_attr "type" "br")]) | |
2756 | |
2757 | |
2758 ;; return | |
2759 | |
2760 ;; This will be used for leaf functions, that don't save any regs and | |
2761 ;; don't have locals on stack, maybe... that is for functions that | |
2762 ;; don't change $sp and don't need to save $lr. | |
2763 (define_expand "return" | |
2764 [(return)] | |
2765 "direct_return()" | |
2766 "") | |
2767 | |
2768 ;; used in spu_expand_epilogue to generate return from a function and | |
2769 ;; explicitly set use of $lr. | |
2770 | |
2771 (define_insn "_return" | |
2772 [(return)] | |
2773 "" | |
2774 "bi\t$lr" | |
2775 [(set_attr "type" "br")]) | |
2776 | |
2777 | |
2778 | |
2779 ;; ceq | |
2780 | |
2781 (define_insn "ceq_<mode>" | |
2782 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") | |
2783 (eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") | |
2784 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] | |
2785 "" | |
2786 "@ | |
2787 ceq<bh>\t%0,%1,%2 | |
2788 ceq<bh>i\t%0,%1,%2") | |
2789 | |
2790 (define_insn_and_split "ceq_di" | |
2791 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
2792 (eq:SI (match_operand:DI 1 "spu_reg_operand" "r") | |
2793 (match_operand:DI 2 "spu_reg_operand" "r")))] | |
2794 "" | |
2795 "#" | |
2796 "reload_completed" | |
2797 [(set (match_dup:SI 0) | |
2798 (eq:SI (match_dup:DI 1) | |
2799 (match_dup:DI 2)))] | |
2800 { | |
2801 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); | |
2802 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); | |
2803 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); | |
2804 emit_insn (gen_ceq_v4si (op0, op1, op2)); | |
2805 emit_insn (gen_spu_gb (op0, op0)); | |
2806 emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11))); | |
2807 DONE; | |
2808 }) | |
2809 | |
2810 | |
2811 ;; We provide the TI compares for completeness and because some parts of | |
2812 ;; gcc/libgcc use them, even though user code might never see it. | |
2813 (define_insn "ceq_ti" | |
2814 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
2815 (eq:SI (match_operand:TI 1 "spu_reg_operand" "r") | |
2816 (match_operand:TI 2 "spu_reg_operand" "r")))] | |
2817 "" | |
2818 "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15" | |
2819 [(set_attr "type" "multi0") | |
2820 (set_attr "length" "12")]) | |
2821 | |
2822 (define_insn "ceq_<mode>" | |
2823 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r") | |
2824 (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r") | |
2825 (match_operand:VSF 2 "spu_reg_operand" "r")))] | |
2826 "" | |
2827 "fceq\t%0,%1,%2") | |
2828 | |
2829 (define_insn "cmeq_<mode>" | |
2830 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r") | |
2831 (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) | |
2832 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] | |
2833 "" | |
2834 "fcmeq\t%0,%1,%2") | |
2835 | |
2836 ;; These implementations will ignore checking of NaN or INF if | |
2837 ;; compiled with option -ffinite-math-only. | |
2838 (define_expand "ceq_df" | |
2839 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
2840 (eq:SI (match_operand:DF 1 "spu_reg_operand" "r") | |
2841 (match_operand:DF 2 "const_zero_operand" "i")))] | |
2842 "" | |
2843 { | |
2844 if (spu_arch == PROCESSOR_CELL) | |
2845 { | |
2846 rtx ra = gen_reg_rtx (V4SImode); | |
2847 rtx rb = gen_reg_rtx (V4SImode); | |
2848 rtx temp = gen_reg_rtx (TImode); | |
2849 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
2850 rtx temp2 = gen_reg_rtx (V4SImode); | |
2851 rtx biteq = gen_reg_rtx (V4SImode); | |
2852 rtx ahi_inf = gen_reg_rtx (V4SImode); | |
2853 rtx a_nan = gen_reg_rtx (V4SImode); | |
2854 rtx a_abs = gen_reg_rtx (V4SImode); | |
2855 rtx b_abs = gen_reg_rtx (V4SImode); | |
2856 rtx iszero = gen_reg_rtx (V4SImode); | |
2857 rtx sign_mask = gen_reg_rtx (V4SImode); | |
2858 rtx nan_mask = gen_reg_rtx (V4SImode); | |
2859 rtx hihi_promote = gen_reg_rtx (TImode); | |
2860 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
2861 0x7FFFFFFF, 0xFFFFFFFF); | |
2862 | |
2863 emit_move_insn (sign_mask, pat); | |
2864 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
2865 0x7FF00000, 0x0); | |
2866 emit_move_insn (nan_mask, pat); | |
2867 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, | |
2868 0x08090A0B, 0x18191A1B); | |
2869 emit_move_insn (hihi_promote, pat); | |
2870 | |
2871 emit_insn (gen_spu_convert (ra, operands[1])); | |
2872 emit_insn (gen_spu_convert (rb, operands[2])); | |
2873 emit_insn (gen_ceq_v4si (biteq, ra, rb)); | |
2874 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), | |
2875 GEN_INT (4 * 8))); | |
2876 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); | |
2877 | |
2878 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
2879 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
2880 if (!flag_finite_math_only) | |
2881 { | |
2882 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
2883 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); | |
2884 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
2885 GEN_INT (4 * 8))); | |
2886 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); | |
2887 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
2888 } | |
2889 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs)); | |
2890 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode))); | |
2891 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), | |
2892 GEN_INT (4 * 8))); | |
2893 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); | |
2894 emit_insn (gen_iorv4si3 (temp2, biteq, iszero)); | |
2895 if (!flag_finite_math_only) | |
2896 { | |
2897 emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); | |
2898 } | |
2899 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); | |
2900 DONE; | |
2901 } | |
2902 }) | |
2903 | |
2904 (define_insn "ceq_<mode>_celledp" | |
2905 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r") | |
2906 (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r") | |
2907 (match_operand:VDF 2 "spu_reg_operand" "r")))] | |
2908 "spu_arch == PROCESSOR_CELLEDP" | |
2909 "dfceq\t%0,%1,%2" | |
2910 [(set_attr "type" "fpd")]) | |
2911 | |
2912 (define_insn "cmeq_<mode>_celledp" | |
2913 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r") | |
2914 (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) | |
2915 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] | |
2916 "spu_arch == PROCESSOR_CELLEDP" | |
2917 "dfcmeq\t%0,%1,%2" | |
2918 [(set_attr "type" "fpd")]) | |
2919 | |
2920 (define_expand "ceq_v2df" | |
2921 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
2922 (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") | |
2923 (match_operand:V2DF 2 "spu_reg_operand" "r")))] | |
2924 "" | |
2925 { | |
2926 if (spu_arch == PROCESSOR_CELL) | |
2927 { | |
2928 rtx ra = spu_gen_subreg (V4SImode, operands[1]); | |
2929 rtx rb = spu_gen_subreg (V4SImode, operands[2]); | |
2930 rtx temp = gen_reg_rtx (TImode); | |
2931 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
2932 rtx temp2 = gen_reg_rtx (V4SImode); | |
2933 rtx biteq = gen_reg_rtx (V4SImode); | |
2934 rtx ahi_inf = gen_reg_rtx (V4SImode); | |
2935 rtx a_nan = gen_reg_rtx (V4SImode); | |
2936 rtx a_abs = gen_reg_rtx (V4SImode); | |
2937 rtx b_abs = gen_reg_rtx (V4SImode); | |
2938 rtx iszero = gen_reg_rtx (V4SImode); | |
2939 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
2940 0x7FFFFFFF, 0xFFFFFFFF); | |
2941 rtx sign_mask = gen_reg_rtx (V4SImode); | |
2942 rtx nan_mask = gen_reg_rtx (V4SImode); | |
2943 rtx hihi_promote = gen_reg_rtx (TImode); | |
2944 | |
2945 emit_move_insn (sign_mask, pat); | |
2946 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
2947 0x7FF00000, 0x0); | |
2948 emit_move_insn (nan_mask, pat); | |
2949 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, | |
2950 0x08090A0B, 0x18191A1B); | |
2951 emit_move_insn (hihi_promote, pat); | |
2952 | |
2953 emit_insn (gen_ceq_v4si (biteq, ra, rb)); | |
2954 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), | |
2955 GEN_INT (4 * 8))); | |
2956 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); | |
2957 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
2958 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
2959 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
2960 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); | |
2961 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
2962 GEN_INT (4 * 8))); | |
2963 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); | |
2964 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
2965 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs)); | |
2966 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode))); | |
2967 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), | |
2968 GEN_INT (4 * 8))); | |
2969 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); | |
2970 emit_insn (gen_iorv4si3 (temp2, biteq, iszero)); | |
2971 emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); | |
2972 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); | |
2973 DONE; | |
2974 } | |
2975 }) | |
2976 | |
2977 (define_expand "cmeq_v2df" | |
2978 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
2979 (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) | |
2980 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] | |
2981 "" | |
2982 { | |
2983 if (spu_arch == PROCESSOR_CELL) | |
2984 { | |
2985 rtx ra = spu_gen_subreg (V4SImode, operands[1]); | |
2986 rtx rb = spu_gen_subreg (V4SImode, operands[2]); | |
2987 rtx temp = gen_reg_rtx (TImode); | |
2988 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
2989 rtx temp2 = gen_reg_rtx (V4SImode); | |
2990 rtx biteq = gen_reg_rtx (V4SImode); | |
2991 rtx ahi_inf = gen_reg_rtx (V4SImode); | |
2992 rtx a_nan = gen_reg_rtx (V4SImode); | |
2993 rtx a_abs = gen_reg_rtx (V4SImode); | |
2994 rtx b_abs = gen_reg_rtx (V4SImode); | |
2995 | |
2996 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
2997 0x7FFFFFFF, 0xFFFFFFFF); | |
2998 rtx sign_mask = gen_reg_rtx (V4SImode); | |
2999 rtx nan_mask = gen_reg_rtx (V4SImode); | |
3000 rtx hihi_promote = gen_reg_rtx (TImode); | |
3001 | |
3002 emit_move_insn (sign_mask, pat); | |
3003 | |
3004 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
3005 0x7FF00000, 0x0); | |
3006 emit_move_insn (nan_mask, pat); | |
3007 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, | |
3008 0x08090A0B, 0x18191A1B); | |
3009 emit_move_insn (hihi_promote, pat); | |
3010 | |
3011 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
3012 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
3013 emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs)); | |
3014 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), | |
3015 GEN_INT (4 * 8))); | |
3016 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); | |
3017 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
3018 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); | |
3019 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
3020 GEN_INT (4 * 8))); | |
3021 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); | |
3022 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
3023 emit_insn (gen_andc_v4si (temp2, biteq, a_nan)); | |
3024 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); | |
3025 DONE; | |
3026 } | |
3027 }) | |
3028 | |
3029 | |
3030 ;; cgt | |
3031 | |
3032 (define_insn "cgt_<mode>" | |
3033 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") | |
3034 (gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") | |
3035 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] | |
3036 "" | |
3037 "@ | |
3038 cgt<bh>\t%0,%1,%2 | |
3039 cgt<bh>i\t%0,%1,%2") | |
3040 | |
3041 (define_insn "cgt_di_m1" | |
3042 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3043 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r") | |
3044 (const_int -1)))] | |
3045 "" | |
3046 "cgti\t%0,%1,-1") | |
3047 | |
3048 (define_insn_and_split "cgt_di" | |
3049 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3050 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r") | |
3051 (match_operand:DI 2 "spu_reg_operand" "r"))) | |
3052 (clobber (match_scratch:V4SI 3 "=&r")) | |
3053 (clobber (match_scratch:V4SI 4 "=&r")) | |
3054 (clobber (match_scratch:V4SI 5 "=&r"))] | |
3055 "" | |
3056 "#" | |
3057 "reload_completed" | |
3058 [(set (match_dup:SI 0) | |
3059 (gt:SI (match_dup:DI 1) | |
3060 (match_dup:DI 2)))] | |
3061 { | |
3062 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); | |
3063 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); | |
3064 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); | |
3065 rtx op3 = operands[3]; | |
3066 rtx op4 = operands[4]; | |
3067 rtx op5 = operands[5]; | |
3068 rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3])); | |
3069 emit_insn (gen_clgt_v4si (op3, op1, op2)); | |
3070 emit_insn (gen_ceq_v4si (op4, op1, op2)); | |
3071 emit_insn (gen_cgt_v4si (op5, op1, op2)); | |
3072 emit_insn (gen_spu_xswd (op3d, op3)); | |
3073 emit_insn (gen_selb (op0, op5, op3, op4)); | |
3074 DONE; | |
3075 }) | |
3076 | |
3077 (define_insn "cgt_ti" | |
3078 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3079 (gt:SI (match_operand:TI 1 "spu_reg_operand" "r") | |
3080 (match_operand:TI 2 "spu_reg_operand" "r"))) | |
3081 (clobber (match_scratch:V4SI 3 "=&r")) | |
3082 (clobber (match_scratch:V4SI 4 "=&r")) | |
3083 (clobber (match_scratch:V4SI 5 "=&r"))] | |
3084 "" | |
3085 "clgt\t%4,%1,%2\;\ | |
3086 ceq\t%3,%1,%2\;\ | |
3087 cgt\t%5,%1,%2\;\ | |
3088 shlqbyi\t%0,%4,4\;\ | |
3089 selb\t%0,%4,%0,%3\;\ | |
3090 shlqbyi\t%0,%0,4\;\ | |
3091 selb\t%0,%4,%0,%3\;\ | |
3092 shlqbyi\t%0,%0,4\;\ | |
3093 selb\t%0,%5,%0,%3" | |
3094 [(set_attr "type" "multi0") | |
3095 (set_attr "length" "36")]) | |
3096 | |
3097 (define_insn "cgt_<mode>" | |
3098 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r") | |
3099 (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r") | |
3100 (match_operand:VSF 2 "spu_reg_operand" "r")))] | |
3101 "" | |
3102 "fcgt\t%0,%1,%2") | |
3103 | |
3104 (define_insn "cmgt_<mode>" | |
3105 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r") | |
3106 (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) | |
3107 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] | |
3108 "" | |
3109 "fcmgt\t%0,%1,%2") | |
3110 | |
3111 (define_expand "cgt_df" | |
3112 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3113 (gt:SI (match_operand:DF 1 "spu_reg_operand" "r") | |
3114 (match_operand:DF 2 "const_zero_operand" "i")))] | |
3115 "" | |
3116 { | |
3117 if (spu_arch == PROCESSOR_CELL) | |
3118 { | |
3119 rtx ra = gen_reg_rtx (V4SImode); | |
3120 rtx rb = gen_reg_rtx (V4SImode); | |
3121 rtx zero = gen_reg_rtx (V4SImode); | |
3122 rtx temp = gen_reg_rtx (TImode); | |
3123 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
3124 rtx temp2 = gen_reg_rtx (V4SImode); | |
3125 rtx hi_inf = gen_reg_rtx (V4SImode); | |
3126 rtx a_nan = gen_reg_rtx (V4SImode); | |
3127 rtx b_nan = gen_reg_rtx (V4SImode); | |
3128 rtx a_abs = gen_reg_rtx (V4SImode); | |
3129 rtx b_abs = gen_reg_rtx (V4SImode); | |
3130 rtx asel = gen_reg_rtx (V4SImode); | |
3131 rtx bsel = gen_reg_rtx (V4SImode); | |
3132 rtx abor = gen_reg_rtx (V4SImode); | |
3133 rtx bbor = gen_reg_rtx (V4SImode); | |
3134 rtx gt_hi = gen_reg_rtx (V4SImode); | |
3135 rtx gt_lo = gen_reg_rtx (V4SImode); | |
3136 rtx sign_mask = gen_reg_rtx (V4SImode); | |
3137 rtx nan_mask = gen_reg_rtx (V4SImode); | |
3138 rtx hi_promote = gen_reg_rtx (TImode); | |
3139 rtx borrow_shuffle = gen_reg_rtx (TImode); | |
3140 | |
3141 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
3142 0x7FFFFFFF, 0xFFFFFFFF); | |
3143 emit_move_insn (sign_mask, pat); | |
3144 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
3145 0x7FF00000, 0x0); | |
3146 emit_move_insn (nan_mask, pat); | |
3147 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, | |
3148 0x08090A0B, 0x08090A0B); | |
3149 emit_move_insn (hi_promote, pat); | |
3150 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, | |
3151 0x0C0D0E0F, 0xC0C0C0C0); | |
3152 emit_move_insn (borrow_shuffle, pat); | |
3153 | |
3154 emit_insn (gen_spu_convert (ra, operands[1])); | |
3155 emit_insn (gen_spu_convert (rb, operands[2])); | |
3156 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
3157 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
3158 | |
3159 if (!flag_finite_math_only) | |
3160 { | |
3161 /* check if ra is NaN */ | |
3162 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); | |
3163 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
3164 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
3165 GEN_INT (4 * 8))); | |
3166 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3167 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
3168 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); | |
3169 | |
3170 /* check if rb is NaN */ | |
3171 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); | |
3172 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); | |
3173 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), | |
3174 GEN_INT (4 * 8))); | |
3175 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3176 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); | |
3177 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); | |
3178 | |
3179 /* check if ra or rb is NaN */ | |
3180 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); | |
3181 } | |
3182 emit_move_insn (zero, CONST0_RTX (V4SImode)); | |
3183 emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31))); | |
3184 emit_insn (gen_shufb (asel, asel, asel, hi_promote)); | |
3185 emit_insn (gen_bg_v4si (abor, zero, a_abs)); | |
3186 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); | |
3187 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); | |
3188 emit_insn (gen_selb (abor, a_abs, abor, asel)); | |
3189 | |
3190 emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); | |
3191 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); | |
3192 emit_insn (gen_bg_v4si (bbor, zero, b_abs)); | |
3193 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); | |
3194 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); | |
3195 emit_insn (gen_selb (bbor, b_abs, bbor, bsel)); | |
3196 | |
3197 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor)); | |
3198 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor)); | |
3199 emit_insn (gen_ceq_v4si (temp2, abor, bbor)); | |
3200 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), | |
3201 GEN_INT (4 * 8))); | |
3202 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); | |
3203 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); | |
3204 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); | |
3205 if (!flag_finite_math_only) | |
3206 { | |
3207 /* correct for NaNs */ | |
3208 emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); | |
3209 } | |
3210 emit_insn (gen_spu_convert (operands[0], temp2)); | |
3211 DONE; | |
3212 } | |
3213 }) | |
3214 | |
3215 (define_insn "cgt_<mode>_celledp" | |
3216 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r") | |
3217 (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r") | |
3218 (match_operand:VDF 2 "spu_reg_operand" "r")))] | |
3219 "spu_arch == PROCESSOR_CELLEDP" | |
3220 "dfcgt\t%0,%1,%2" | |
3221 [(set_attr "type" "fpd")]) | |
3222 | |
3223 (define_insn "cmgt_<mode>_celledp" | |
3224 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r") | |
3225 (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) | |
3226 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] | |
3227 "spu_arch == PROCESSOR_CELLEDP" | |
3228 "dfcmgt\t%0,%1,%2" | |
3229 [(set_attr "type" "fpd")]) | |
3230 | |
3231 (define_expand "cgt_v2df" | |
3232 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
3233 (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") | |
3234 (match_operand:V2DF 2 "spu_reg_operand" "r")))] | |
3235 "" | |
3236 { | |
3237 if (spu_arch == PROCESSOR_CELL) | |
3238 { | |
3239 rtx ra = spu_gen_subreg (V4SImode, operands[1]); | |
3240 rtx rb = spu_gen_subreg (V4SImode, operands[2]); | |
3241 rtx zero = gen_reg_rtx (V4SImode); | |
3242 rtx temp = gen_reg_rtx (TImode); | |
3243 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
3244 rtx temp2 = gen_reg_rtx (V4SImode); | |
3245 rtx hi_inf = gen_reg_rtx (V4SImode); | |
3246 rtx a_nan = gen_reg_rtx (V4SImode); | |
3247 rtx b_nan = gen_reg_rtx (V4SImode); | |
3248 rtx a_abs = gen_reg_rtx (V4SImode); | |
3249 rtx b_abs = gen_reg_rtx (V4SImode); | |
3250 rtx asel = gen_reg_rtx (V4SImode); | |
3251 rtx bsel = gen_reg_rtx (V4SImode); | |
3252 rtx abor = gen_reg_rtx (V4SImode); | |
3253 rtx bbor = gen_reg_rtx (V4SImode); | |
3254 rtx gt_hi = gen_reg_rtx (V4SImode); | |
3255 rtx gt_lo = gen_reg_rtx (V4SImode); | |
3256 rtx sign_mask = gen_reg_rtx (V4SImode); | |
3257 rtx nan_mask = gen_reg_rtx (V4SImode); | |
3258 rtx hi_promote = gen_reg_rtx (TImode); | |
3259 rtx borrow_shuffle = gen_reg_rtx (TImode); | |
3260 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
3261 0x7FFFFFFF, 0xFFFFFFFF); | |
3262 emit_move_insn (sign_mask, pat); | |
3263 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
3264 0x7FF00000, 0x0); | |
3265 emit_move_insn (nan_mask, pat); | |
3266 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, | |
3267 0x08090A0B, 0x08090A0B); | |
3268 emit_move_insn (hi_promote, pat); | |
3269 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, | |
3270 0x0C0D0E0F, 0xC0C0C0C0); | |
3271 emit_move_insn (borrow_shuffle, pat); | |
3272 | |
3273 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
3274 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); | |
3275 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
3276 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
3277 GEN_INT (4 * 8))); | |
3278 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3279 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
3280 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); | |
3281 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
3282 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); | |
3283 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); | |
3284 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), | |
3285 GEN_INT (4 * 8))); | |
3286 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3287 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); | |
3288 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); | |
3289 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); | |
3290 emit_move_insn (zero, CONST0_RTX (V4SImode)); | |
3291 emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31))); | |
3292 emit_insn (gen_shufb (asel, asel, asel, hi_promote)); | |
3293 emit_insn (gen_bg_v4si (abor, zero, a_abs)); | |
3294 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); | |
3295 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); | |
3296 emit_insn (gen_selb (abor, a_abs, abor, asel)); | |
3297 emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); | |
3298 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); | |
3299 emit_insn (gen_bg_v4si (bbor, zero, b_abs)); | |
3300 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); | |
3301 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); | |
3302 emit_insn (gen_selb (bbor, b_abs, bbor, bsel)); | |
3303 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor)); | |
3304 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor)); | |
3305 emit_insn (gen_ceq_v4si (temp2, abor, bbor)); | |
3306 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), | |
3307 GEN_INT (4 * 8))); | |
3308 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); | |
3309 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); | |
3310 | |
3311 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); | |
3312 emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); | |
3313 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); | |
3314 DONE; | |
3315 } | |
3316 }) | |
3317 | |
3318 (define_expand "cmgt_v2df" | |
3319 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
3320 (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) | |
3321 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] | |
3322 "" | |
3323 { | |
3324 if (spu_arch == PROCESSOR_CELL) | |
3325 { | |
3326 rtx ra = spu_gen_subreg (V4SImode, operands[1]); | |
3327 rtx rb = spu_gen_subreg (V4SImode, operands[2]); | |
3328 rtx temp = gen_reg_rtx (TImode); | |
3329 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
3330 rtx temp2 = gen_reg_rtx (V4SImode); | |
3331 rtx hi_inf = gen_reg_rtx (V4SImode); | |
3332 rtx a_nan = gen_reg_rtx (V4SImode); | |
3333 rtx b_nan = gen_reg_rtx (V4SImode); | |
3334 rtx a_abs = gen_reg_rtx (V4SImode); | |
3335 rtx b_abs = gen_reg_rtx (V4SImode); | |
3336 rtx gt_hi = gen_reg_rtx (V4SImode); | |
3337 rtx gt_lo = gen_reg_rtx (V4SImode); | |
3338 rtx sign_mask = gen_reg_rtx (V4SImode); | |
3339 rtx nan_mask = gen_reg_rtx (V4SImode); | |
3340 rtx hi_promote = gen_reg_rtx (TImode); | |
3341 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
3342 0x7FFFFFFF, 0xFFFFFFFF); | |
3343 emit_move_insn (sign_mask, pat); | |
3344 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
3345 0x7FF00000, 0x0); | |
3346 emit_move_insn (nan_mask, pat); | |
3347 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, | |
3348 0x08090A0B, 0x08090A0B); | |
3349 emit_move_insn (hi_promote, pat); | |
3350 | |
3351 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); | |
3352 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); | |
3353 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); | |
3354 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), | |
3355 GEN_INT (4 * 8))); | |
3356 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3357 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); | |
3358 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); | |
3359 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); | |
3360 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); | |
3361 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); | |
3362 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), | |
3363 GEN_INT (4 * 8))); | |
3364 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); | |
3365 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); | |
3366 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); | |
3367 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); | |
3368 | |
3369 emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs)); | |
3370 emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs)); | |
3371 emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs)); | |
3372 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), | |
3373 GEN_INT (4 * 8))); | |
3374 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); | |
3375 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); | |
3376 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); | |
3377 emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); | |
3378 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); | |
3379 DONE; | |
3380 } | |
3381 }) | |
3382 | |
3383 | |
3384 ;; clgt | |
3385 | |
3386 (define_insn "clgt_<mode>" | |
3387 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") | |
3388 (gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") | |
3389 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] | |
3390 "" | |
3391 "@ | |
3392 clgt<bh>\t%0,%1,%2 | |
3393 clgt<bh>i\t%0,%1,%2") | |
3394 | |
3395 (define_insn_and_split "clgt_di" | |
3396 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3397 (gtu:SI (match_operand:DI 1 "spu_reg_operand" "r") | |
3398 (match_operand:DI 2 "spu_reg_operand" "r"))) | |
3399 (clobber (match_scratch:V4SI 3 "=&r")) | |
3400 (clobber (match_scratch:V4SI 4 "=&r")) | |
3401 (clobber (match_scratch:V4SI 5 "=&r"))] | |
3402 "" | |
3403 "#" | |
3404 "reload_completed" | |
3405 [(set (match_dup:SI 0) | |
3406 (gtu:SI (match_dup:DI 1) | |
3407 (match_dup:DI 2)))] | |
3408 { | |
3409 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); | |
3410 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); | |
3411 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); | |
3412 rtx op3 = operands[3]; | |
3413 rtx op4 = operands[4]; | |
3414 rtx op5 = operands[5]; | |
3415 rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5])); | |
3416 emit_insn (gen_clgt_v4si (op3, op1, op2)); | |
3417 emit_insn (gen_ceq_v4si (op4, op1, op2)); | |
3418 emit_insn (gen_spu_xswd (op5d, op3)); | |
3419 emit_insn (gen_selb (op0, op3, op5, op4)); | |
3420 DONE; | |
3421 }) | |
3422 | |
3423 (define_insn "clgt_ti" | |
3424 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
3425 (gtu:SI (match_operand:TI 1 "spu_reg_operand" "r") | |
3426 (match_operand:TI 2 "spu_reg_operand" "r"))) | |
3427 (clobber (match_scratch:V4SI 3 "=&r")) | |
3428 (clobber (match_scratch:V4SI 4 "=&r"))] | |
3429 "" | |
3430 "ceq\t%3,%1,%2\;\ | |
3431 clgt\t%4,%1,%2\;\ | |
3432 shlqbyi\t%0,%4,4\;\ | |
3433 selb\t%0,%4,%0,%3\;\ | |
3434 shlqbyi\t%0,%0,4\;\ | |
3435 selb\t%0,%4,%0,%3\;\ | |
3436 shlqbyi\t%0,%0,4\;\ | |
3437 selb\t%0,%4,%0,%3" | |
3438 [(set_attr "type" "multi0") | |
3439 (set_attr "length" "32")]) | |
3440 | |
3441 | |
3442 ;; dftsv | |
3443 (define_insn "dftsv_celledp" | |
3444 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
3445 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r") | |
3446 (match_operand:SI 2 "const_int_operand" "i")] | |
3447 UNSPEC_DFTSV))] | |
3448 "spu_arch == PROCESSOR_CELLEDP" | |
3449 "dftsv\t%0,%1,%2" | |
3450 [(set_attr "type" "fpd")]) | |
3451 | |
3452 (define_expand "dftsv" | |
3453 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") | |
3454 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r") | |
3455 (match_operand:SI 2 "const_int_operand" "i")] | |
3456 UNSPEC_DFTSV))] | |
3457 "" | |
3458 { | |
3459 if (spu_arch == PROCESSOR_CELL) | |
3460 { | |
3461 rtx result = gen_reg_rtx (V4SImode); | |
3462 emit_move_insn (result, CONST0_RTX (V4SImode)); | |
3463 | |
3464 if (INTVAL (operands[2])) | |
3465 { | |
3466 rtx ra = spu_gen_subreg (V4SImode, operands[1]); | |
3467 rtx abs = gen_reg_rtx (V4SImode); | |
3468 rtx sign = gen_reg_rtx (V4SImode); | |
3469 rtx temp = gen_reg_rtx (TImode); | |
3470 rtx temp_v4si = spu_gen_subreg (V4SImode, temp); | |
3471 rtx temp2 = gen_reg_rtx (V4SImode); | |
3472 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, | |
3473 0x7FFFFFFF, 0xFFFFFFFF); | |
3474 rtx sign_mask = gen_reg_rtx (V4SImode); | |
3475 rtx hi_promote = gen_reg_rtx (TImode); | |
3476 emit_move_insn (sign_mask, pat); | |
3477 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, | |
3478 0x08090A0B, 0x08090A0B); | |
3479 emit_move_insn (hi_promote, pat); | |
3480 | |
3481 emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31))); | |
3482 emit_insn (gen_shufb (sign, sign, sign, hi_promote)); | |
3483 emit_insn (gen_andv4si3 (abs, ra, sign_mask)); | |
3484 | |
3485 /* NaN or +inf or -inf */ | |
3486 if (INTVAL (operands[2]) & 0x70) | |
3487 { | |
3488 rtx nan_mask = gen_reg_rtx (V4SImode); | |
3489 rtx isinf = gen_reg_rtx (V4SImode); | |
3490 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, | |
3491 0x7FF00000, 0x0); | |
3492 emit_move_insn (nan_mask, pat); | |
3493 emit_insn (gen_ceq_v4si (isinf, abs, nan_mask)); | |
3494 | |
3495 /* NaN */ | |
3496 if (INTVAL (operands[2]) & 0x40) | |
3497 { | |
3498 rtx isnan = gen_reg_rtx (V4SImode); | |
3499 emit_insn (gen_clgt_v4si (isnan, abs, nan_mask)); | |
3500 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan), | |
3501 GEN_INT (4 * 8))); | |
3502 emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf)); | |
3503 emit_insn (gen_iorv4si3 (isnan, isnan, temp2)); | |
3504 emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote)); | |
3505 emit_insn (gen_iorv4si3 (result, result, isnan)); | |
3506 } | |
3507 /* +inf or -inf */ | |
3508 if (INTVAL (operands[2]) & 0x30) | |
3509 { | |
3510 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf), | |
3511 GEN_INT (4 * 8))); | |
3512 emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si)); | |
3513 emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote)); | |
3514 | |
3515 /* +inf */ | |
3516 if (INTVAL (operands[2]) & 0x20) | |
3517 { | |
3518 emit_insn (gen_andc_v4si (temp2, isinf, sign)); | |
3519 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3520 } | |
3521 /* -inf */ | |
3522 if (INTVAL (operands[2]) & 0x10) | |
3523 { | |
3524 emit_insn (gen_andv4si3 (temp2, isinf, sign)); | |
3525 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3526 } | |
3527 } | |
3528 } | |
3529 | |
3530 /* 0 or denorm */ | |
3531 if (INTVAL (operands[2]) & 0xF) | |
3532 { | |
3533 rtx iszero = gen_reg_rtx (V4SImode); | |
3534 emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode))); | |
3535 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), | |
3536 GEN_INT (4 * 8))); | |
3537 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); | |
3538 | |
3539 /* denorm */ | |
3540 if (INTVAL (operands[2]) & 0x3) | |
3541 { | |
3542 rtx isdenorm = gen_reg_rtx (V4SImode); | |
3543 rtx denorm_mask = gen_reg_rtx (V4SImode); | |
3544 emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF)); | |
3545 emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask)); | |
3546 emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero)); | |
3547 emit_insn (gen_shufb (isdenorm, isdenorm, | |
3548 isdenorm, hi_promote)); | |
3549 /* +denorm */ | |
3550 if (INTVAL (operands[2]) & 0x2) | |
3551 { | |
3552 emit_insn (gen_andc_v4si (temp2, isdenorm, sign)); | |
3553 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3554 } | |
3555 /* -denorm */ | |
3556 if (INTVAL (operands[2]) & 0x1) | |
3557 { | |
3558 emit_insn (gen_andv4si3 (temp2, isdenorm, sign)); | |
3559 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3560 } | |
3561 } | |
3562 | |
3563 /* 0 */ | |
3564 if (INTVAL (operands[2]) & 0xC) | |
3565 { | |
3566 emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote)); | |
3567 /* +0 */ | |
3568 if (INTVAL (operands[2]) & 0x8) | |
3569 { | |
3570 emit_insn (gen_andc_v4si (temp2, iszero, sign)); | |
3571 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3572 } | |
3573 /* -0 */ | |
3574 if (INTVAL (operands[2]) & 0x4) | |
3575 { | |
3576 emit_insn (gen_andv4si3 (temp2, iszero, sign)); | |
3577 emit_insn (gen_iorv4si3 (result, result, temp2)); | |
3578 } | |
3579 } | |
3580 } | |
3581 } | |
3582 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result)); | |
3583 DONE; | |
3584 } | |
3585 }) | |
3586 | |
3587 | |
3588 ;; branches | |
3589 | |
3590 (define_insn "" | |
3591 [(set (pc) | |
3592 (if_then_else (match_operator 1 "branch_comparison_operator" | |
3593 [(match_operand 2 | |
3594 "spu_reg_operand" "r") | |
3595 (const_int 0)]) | |
3596 (label_ref (match_operand 0 "" "")) | |
3597 (pc)))] | |
3598 "" | |
3599 "br%b2%b1z\t%2,%0" | |
3600 [(set_attr "type" "br")]) | |
3601 | |
3602 (define_insn "" | |
3603 [(set (pc) | |
3604 (if_then_else (match_operator 0 "branch_comparison_operator" | |
3605 [(match_operand 1 | |
3606 "spu_reg_operand" "r") | |
3607 (const_int 0)]) | |
3608 (return) | |
3609 (pc)))] | |
3610 "direct_return ()" | |
3611 "bi%b1%b0z\t%1,$lr" | |
3612 [(set_attr "type" "br")]) | |
3613 | |
3614 (define_insn "" | |
3615 [(set (pc) | |
3616 (if_then_else (match_operator 1 "branch_comparison_operator" | |
3617 [(match_operand 2 | |
3618 "spu_reg_operand" "r") | |
3619 (const_int 0)]) | |
3620 (pc) | |
3621 (label_ref (match_operand 0 "" ""))))] | |
3622 "" | |
3623 "br%b2%b1z\t%2,%0" | |
3624 [(set_attr "type" "br")]) | |
3625 | |
3626 (define_insn "" | |
3627 [(set (pc) | |
3628 (if_then_else (match_operator 0 "branch_comparison_operator" | |
3629 [(match_operand 1 | |
3630 "spu_reg_operand" "r") | |
3631 (const_int 0)]) | |
3632 (pc) | |
3633 (return)))] | |
3634 "direct_return ()" | |
3635 "bi%b1%b0z\t%1,$lr" | |
3636 [(set_attr "type" "br")]) | |
3637 | |
3638 | |
3639 ;; Compare insns are next. Note that the spu has two types of compares, | |
3640 ;; signed & unsigned, and one type of branch. | |
3641 ;; | |
3642 ;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc | |
3643 ;; insns, and branches. We store the operands of compares until we see | |
3644 ;; how it is used. | |
3645 | |
3646 (define_expand "cmp<mode>" | |
3647 [(set (cc0) | |
3648 (compare (match_operand:VQHSI 0 "spu_reg_operand" "") | |
3649 (match_operand:VQHSI 1 "spu_nonmem_operand" "")))] | |
3650 "" | |
3651 { | |
3652 spu_compare_op0 = operands[0]; | |
3653 spu_compare_op1 = operands[1]; | |
3654 DONE; | |
3655 }) | |
3656 | |
3657 (define_expand "cmp<mode>" | |
3658 [(set (cc0) | |
3659 (compare (match_operand:DTI 0 "spu_reg_operand" "") | |
3660 (match_operand:DTI 1 "spu_reg_operand" "")))] | |
3661 "" | |
3662 { | |
3663 spu_compare_op0 = operands[0]; | |
3664 spu_compare_op1 = operands[1]; | |
3665 DONE; | |
3666 }) | |
3667 | |
3668 (define_expand "cmp<mode>" | |
3669 [(set (cc0) | |
3670 (compare (match_operand:VSF 0 "spu_reg_operand" "") | |
3671 (match_operand:VSF 1 "spu_reg_operand" "")))] | |
3672 "" | |
3673 { | |
3674 spu_compare_op0 = operands[0]; | |
3675 spu_compare_op1 = operands[1]; | |
3676 DONE; | |
3677 }) | |
3678 | |
3679 (define_expand "cmpdf" | |
3680 [(set (cc0) | |
3681 (compare (match_operand:DF 0 "register_operand" "") | |
3682 (match_operand:DF 1 "register_operand" "")))] | |
3683 "" | |
3684 "{ | |
3685 spu_compare_op0 = operands[0]; | |
3686 spu_compare_op1 = operands[1]; | |
3687 DONE; | |
3688 }") | |
3689 | |
3690 ;; vector conditional compare patterns | |
3691 (define_expand "vcond<mode>" | |
3692 [(set (match_operand:VCMP 0 "spu_reg_operand" "=r") | |
3693 (if_then_else:VCMP | |
3694 (match_operator 3 "comparison_operator" | |
3695 [(match_operand:VCMP 4 "spu_reg_operand" "r") | |
3696 (match_operand:VCMP 5 "spu_reg_operand" "r")]) | |
3697 (match_operand:VCMP 1 "spu_reg_operand" "r") | |
3698 (match_operand:VCMP 2 "spu_reg_operand" "r")))] | |
3699 "" | |
3700 { | |
3701 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], | |
3702 operands[3], operands[4], operands[5])) | |
3703 DONE; | |
3704 else | |
3705 FAIL; | |
3706 }) | |
3707 | |
3708 (define_expand "vcondu<mode>" | |
3709 [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r") | |
3710 (if_then_else:VCMPU | |
3711 (match_operator 3 "comparison_operator" | |
3712 [(match_operand:VCMPU 4 "spu_reg_operand" "r") | |
3713 (match_operand:VCMPU 5 "spu_reg_operand" "r")]) | |
3714 (match_operand:VCMPU 1 "spu_reg_operand" "r") | |
3715 (match_operand:VCMPU 2 "spu_reg_operand" "r")))] | |
3716 "" | |
3717 { | |
3718 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], | |
3719 operands[3], operands[4], operands[5])) | |
3720 DONE; | |
3721 else | |
3722 FAIL; | |
3723 }) | |
3724 | |
3725 | |
3726 ;; branch on condition | |
3727 | |
3728 (define_expand "beq" | |
3729 [(use (match_operand 0 "" ""))] | |
3730 "" | |
3731 { spu_emit_branch_or_set (0, EQ, operands); DONE; }) | |
3732 | |
3733 (define_expand "bne" | |
3734 [(use (match_operand 0 "" ""))] | |
3735 "" | |
3736 { spu_emit_branch_or_set (0, NE, operands); DONE; }) | |
3737 | |
3738 (define_expand "bge" | |
3739 [(use (match_operand 0 "" ""))] | |
3740 "" | |
3741 { spu_emit_branch_or_set (0, GE, operands); DONE; }) | |
3742 | |
3743 (define_expand "bgt" | |
3744 [(use (match_operand 0 "" ""))] | |
3745 "" | |
3746 { spu_emit_branch_or_set (0, GT, operands); DONE; }) | |
3747 | |
3748 (define_expand "ble" | |
3749 [(use (match_operand 0 "" ""))] | |
3750 "" | |
3751 { spu_emit_branch_or_set (0, LE, operands); DONE; }) | |
3752 | |
3753 (define_expand "blt" | |
3754 [(use (match_operand 0 "" ""))] | |
3755 "" | |
3756 { spu_emit_branch_or_set (0, LT, operands); DONE; }) | |
3757 | |
3758 (define_expand "bgeu" | |
3759 [(use (match_operand 0 "" ""))] | |
3760 "" | |
3761 { spu_emit_branch_or_set (0, GEU, operands); DONE; }) | |
3762 | |
3763 (define_expand "bgtu" | |
3764 [(use (match_operand 0 "" ""))] | |
3765 "" | |
3766 { spu_emit_branch_or_set (0, GTU, operands); DONE; }) | |
3767 | |
3768 (define_expand "bleu" | |
3769 [(use (match_operand 0 "" ""))] | |
3770 "" | |
3771 { spu_emit_branch_or_set (0, LEU, operands); DONE; }) | |
3772 | |
3773 (define_expand "bltu" | |
3774 [(use (match_operand 0 "" ""))] | |
3775 "" | |
3776 { spu_emit_branch_or_set (0, LTU, operands); DONE; }) | |
3777 | |
3778 | |
3779 ;; set on condition | |
3780 | |
3781 (define_expand "seq" | |
3782 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3783 "" | |
3784 { spu_emit_branch_or_set (1, EQ, operands); DONE; }) | |
3785 | |
3786 (define_expand "sne" | |
3787 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3788 "" | |
3789 { spu_emit_branch_or_set (1, NE, operands); DONE; }) | |
3790 | |
3791 (define_expand "sgt" | |
3792 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3793 "" | |
3794 { spu_emit_branch_or_set (1, GT, operands); DONE; }) | |
3795 | |
3796 (define_expand "slt" | |
3797 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3798 "" | |
3799 { spu_emit_branch_or_set (1, LT, operands); DONE; }) | |
3800 | |
3801 (define_expand "sge" | |
3802 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3803 "" | |
3804 { spu_emit_branch_or_set (1, GE, operands); DONE; }) | |
3805 | |
3806 (define_expand "sle" | |
3807 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3808 "" | |
3809 { spu_emit_branch_or_set (1, LE, operands); DONE; }) | |
3810 | |
3811 (define_expand "sgtu" | |
3812 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3813 "" | |
3814 { spu_emit_branch_or_set (1, GTU, operands); DONE; }) | |
3815 | |
3816 (define_expand "sltu" | |
3817 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3818 "" | |
3819 { spu_emit_branch_or_set (1, LTU, operands); DONE; }) | |
3820 | |
3821 (define_expand "sgeu" | |
3822 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3823 "" | |
3824 { spu_emit_branch_or_set (1, GEU, operands); DONE; }) | |
3825 | |
3826 (define_expand "sleu" | |
3827 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))] | |
3828 "" | |
3829 { spu_emit_branch_or_set (1, LEU, operands); DONE; }) | |
3830 | |
3831 | |
3832 ;; conditional move | |
3833 | |
3834 ;; Define this first one so HAVE_conditional_move is defined. | |
3835 (define_insn "movcc_dummy" | |
3836 [(set (match_operand 0 "" "") | |
3837 (if_then_else (match_operand 1 "" "") | |
3838 (match_operand 2 "" "") | |
3839 (match_operand 3 "" "")))] | |
3840 "!operands[0]" | |
3841 "") | |
3842 | |
3843 (define_expand "mov<mode>cc" | |
3844 [(set (match_operand:ALL 0 "spu_reg_operand" "") | |
3845 (if_then_else:ALL (match_operand 1 "comparison_operator" "") | |
3846 (match_operand:ALL 2 "spu_reg_operand" "") | |
3847 (match_operand:ALL 3 "spu_reg_operand" "")))] | |
3848 "" | |
3849 { | |
3850 spu_emit_branch_or_set(2, GET_CODE(operands[1]), operands); | |
3851 DONE; | |
3852 }) | |
3853 | |
3854 ;; This pattern is used when the result of a compare is not large | |
3855 ;; enough to use in a selb when expanding conditional moves. | |
3856 (define_expand "extend_compare" | |
3857 [(set (match_operand 0 "spu_reg_operand" "=r") | |
3858 (unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))] | |
3859 "" | |
3860 { | |
3861 emit_insn (gen_rtx_SET (VOIDmode, operands[0], | |
3862 gen_rtx_UNSPEC (GET_MODE (operands[0]), | |
3863 gen_rtvec (1, operands[1]), | |
3864 UNSPEC_EXTEND_CMP))); | |
3865 DONE; | |
3866 }) | |
3867 | |
3868 (define_insn "extend_compare<mode>" | |
3869 [(set (match_operand:ALL 0 "spu_reg_operand" "=r") | |
3870 (unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))] | |
3871 "operands" | |
3872 "fsm\t%0,%1" | |
3873 [(set_attr "type" "shuf")]) | |
3874 | |
3875 | |
3876 ;; case | |
3877 | |
3878 ;; operand 0 is index | |
3879 ;; operand 1 is the minimum bound | |
3880 ;; operand 2 is the maximum bound - minimum bound + 1 | |
3881 ;; operand 3 is CODE_LABEL for the table; | |
3882 ;; operand 4 is the CODE_LABEL to go to if index out of range. | |
3883 (define_expand "casesi" | |
3884 [(match_operand:SI 0 "spu_reg_operand" "") | |
3885 (match_operand:SI 1 "immediate_operand" "") | |
3886 (match_operand:SI 2 "immediate_operand" "") | |
3887 (match_operand 3 "" "") | |
3888 (match_operand 4 "" "")] | |
3889 "" | |
3890 { | |
3891 rtx table = gen_reg_rtx (SImode); | |
3892 rtx index = gen_reg_rtx (SImode); | |
3893 rtx sindex = gen_reg_rtx (SImode); | |
3894 rtx addr = gen_reg_rtx (Pmode); | |
3895 | |
3896 emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3])); | |
3897 | |
3898 emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1]))); | |
3899 emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2))); | |
3900 emit_move_insn (addr, gen_rtx_MEM (SImode, | |
3901 gen_rtx_PLUS (SImode, table, sindex))); | |
3902 if (flag_pic) | |
3903 emit_insn (gen_addsi3 (addr, addr, table)); | |
3904 | |
3905 emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]); | |
3906 emit_jump_insn (gen_tablejump (addr, operands[3])); | |
3907 DONE; | |
3908 }) | |
3909 | |
3910 (define_insn "tablejump" | |
3911 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r")) | |
3912 (use (label_ref (match_operand 1 "" "")))] | |
3913 "" | |
3914 "bi\t%0" | |
3915 [(set_attr "type" "br")]) | |
3916 | |
3917 | |
3918 ;; call | |
3919 | |
3920 ;; Note that operand 1 is total size of args, in bytes, | |
3921 ;; and what the call insn wants is the number of words. | |
3922 (define_expand "sibcall" | |
3923 [(parallel | |
3924 [(call (match_operand:QI 0 "call_operand" "") | |
3925 (match_operand:QI 1 "" "")) | |
3926 (use (reg:SI 0))])] | |
3927 "" | |
3928 { | |
3929 if (! call_operand (operands[0], QImode)) | |
3930 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); | |
3931 }) | |
3932 | |
3933 (define_insn "_sibcall" | |
3934 [(parallel | |
3935 [(call (match_operand:QI 0 "call_operand" "R,S") | |
3936 (match_operand:QI 1 "" "i,i")) | |
3937 (use (reg:SI 0))])] | |
3938 "SIBLING_CALL_P(insn)" | |
3939 "@ | |
3940 bi\t%i0 | |
3941 br\t%0" | |
3942 [(set_attr "type" "br,br")]) | |
3943 | |
3944 (define_expand "sibcall_value" | |
3945 [(parallel | |
3946 [(set (match_operand 0 "" "") | |
3947 (call (match_operand:QI 1 "call_operand" "") | |
3948 (match_operand:QI 2 "" ""))) | |
3949 (use (reg:SI 0))])] | |
3950 "" | |
3951 { | |
3952 if (! call_operand (operands[1], QImode)) | |
3953 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); | |
3954 }) | |
3955 | |
3956 (define_insn "_sibcall_value" | |
3957 [(parallel | |
3958 [(set (match_operand 0 "" "") | |
3959 (call (match_operand:QI 1 "call_operand" "R,S") | |
3960 (match_operand:QI 2 "" "i,i"))) | |
3961 (use (reg:SI 0))])] | |
3962 "SIBLING_CALL_P(insn)" | |
3963 "@ | |
3964 bi\t%i1 | |
3965 br\t%1" | |
3966 [(set_attr "type" "br,br")]) | |
3967 | |
3968 ;; Note that operand 1 is total size of args, in bytes, | |
3969 ;; and what the call insn wants is the number of words. | |
3970 (define_expand "call" | |
3971 [(parallel | |
3972 [(call (match_operand:QI 0 "call_operand" "") | |
3973 (match_operand:QI 1 "" "")) | |
3974 (clobber (reg:SI 0)) | |
3975 (clobber (reg:SI 130))])] | |
3976 "" | |
3977 { | |
3978 if (! call_operand (operands[0], QImode)) | |
3979 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); | |
3980 }) | |
3981 | |
3982 (define_insn "_call" | |
3983 [(parallel | |
3984 [(call (match_operand:QI 0 "call_operand" "R,S,T") | |
3985 (match_operand:QI 1 "" "i,i,i")) | |
3986 (clobber (reg:SI 0)) | |
3987 (clobber (reg:SI 130))])] | |
3988 "" | |
3989 "@ | |
3990 bisl\t$lr,%i0 | |
3991 brsl\t$lr,%0 | |
3992 brasl\t$lr,%0" | |
3993 [(set_attr "type" "br")]) | |
3994 | |
3995 (define_expand "call_value" | |
3996 [(parallel | |
3997 [(set (match_operand 0 "" "") | |
3998 (call (match_operand:QI 1 "call_operand" "") | |
3999 (match_operand:QI 2 "" ""))) | |
4000 (clobber (reg:SI 0)) | |
4001 (clobber (reg:SI 130))])] | |
4002 "" | |
4003 { | |
4004 if (! call_operand (operands[1], QImode)) | |
4005 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); | |
4006 }) | |
4007 | |
4008 (define_insn "_call_value" | |
4009 [(parallel | |
4010 [(set (match_operand 0 "" "") | |
4011 (call (match_operand:QI 1 "call_operand" "R,S,T") | |
4012 (match_operand:QI 2 "" "i,i,i"))) | |
4013 (clobber (reg:SI 0)) | |
4014 (clobber (reg:SI 130))])] | |
4015 "" | |
4016 "@ | |
4017 bisl\t$lr,%i1 | |
4018 brsl\t$lr,%1 | |
4019 brasl\t$lr,%1" | |
4020 [(set_attr "type" "br")]) | |
4021 | |
4022 (define_expand "untyped_call" | |
4023 [(parallel [(call (match_operand 0 "" "") | |
4024 (const_int 0)) | |
4025 (match_operand 1 "" "") | |
4026 (match_operand 2 "" "")])] | |
4027 "" | |
4028 { | |
4029 int i; | |
4030 rtx reg = gen_rtx_REG (TImode, 3); | |
4031 | |
4032 /* We need to use call_value so the return value registers don't get | |
4033 * clobbered. */ | |
4034 emit_call_insn (gen_call_value (reg, operands[0], const0_rtx)); | |
4035 | |
4036 for (i = 0; i < XVECLEN (operands[2], 0); i++) | |
4037 { | |
4038 rtx set = XVECEXP (operands[2], 0, i); | |
4039 emit_move_insn (SET_DEST (set), SET_SRC (set)); | |
4040 } | |
4041 | |
4042 /* The optimizer does not know that the call sets the function value | |
4043 registers we stored in the result block. We avoid problems by | |
4044 claiming that all hard registers are used and clobbered at this | |
4045 point. */ | |
4046 emit_insn (gen_blockage ()); | |
4047 | |
4048 DONE; | |
4049 }) | |
4050 | |
4051 | |
4052 ;; Patterns used for splitting and combining. | |
4053 | |
4054 | |
4055 ;; Function prologue and epilogue. | |
4056 | |
4057 (define_expand "prologue" | |
4058 [(const_int 1)] | |
4059 "" | |
4060 { spu_expand_prologue (); DONE; }) | |
4061 | |
4062 ;; "blockage" is only emited in epilogue. This is what it took to | |
4063 ;; make "basic block reordering" work with the insns sequence | |
4064 ;; generated by the spu_expand_epilogue (taken from mips.md) | |
4065 | |
4066 (define_insn "blockage" | |
4067 [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] | |
4068 "" | |
4069 "" | |
4070 [(set_attr "type" "convert") | |
4071 (set_attr "length" "0")]) | |
4072 | |
4073 (define_expand "epilogue" | |
4074 [(const_int 2)] | |
4075 "" | |
4076 { spu_expand_epilogue (false); DONE; }) | |
4077 | |
4078 (define_expand "sibcall_epilogue" | |
4079 [(const_int 2)] | |
4080 "" | |
4081 { spu_expand_epilogue (true); DONE; }) | |
4082 | |
4083 | |
4084 ;; stack manipulations | |
4085 | |
4086 ;; An insn to allocate new stack space for dynamic use (e.g., alloca). | |
4087 ;; We move the back-chain and decrement the stack pointer. | |
4088 (define_expand "allocate_stack" | |
4089 [(set (match_operand 0 "spu_reg_operand" "") | |
4090 (minus (reg 1) (match_operand 1 "spu_nonmem_operand" ""))) | |
4091 (set (reg 1) | |
4092 (minus (reg 1) (match_dup 1)))] | |
4093 "" | |
4094 "spu_allocate_stack (operands[0], operands[1]); DONE;") | |
4095 | |
4096 ;; These patterns say how to save and restore the stack pointer. We need not | |
4097 ;; save the stack pointer at function level since we are careful to preserve | |
4098 ;; the backchain. | |
4099 ;; | |
4100 | |
4101 ;; At block level the stack pointer is saved and restored, so that the | |
4102 ;; stack space allocated within a block is deallocated when leaving | |
4103 ;; block scope. By default, according to the SPU ABI, the stack | |
4104 ;; pointer and available stack size are saved in a register. Upon | |
4105 ;; restoration, the stack pointer is simply copied back, and the | |
4106 ;; current available stack size is calculated against the restored | |
4107 ;; stack pointer. | |
4108 ;; | |
4109 ;; For nonlocal gotos, we must save the stack pointer and its | |
4110 ;; backchain and restore both. Note that in the nonlocal case, the | |
4111 ;; save area is a memory location. | |
4112 | |
4113 (define_expand "save_stack_function" | |
4114 [(match_operand 0 "general_operand" "") | |
4115 (match_operand 1 "general_operand" "")] | |
4116 "" | |
4117 "DONE;") | |
4118 | |
4119 (define_expand "restore_stack_function" | |
4120 [(match_operand 0 "general_operand" "") | |
4121 (match_operand 1 "general_operand" "")] | |
4122 "" | |
4123 "DONE;") | |
4124 | |
4125 (define_expand "restore_stack_block" | |
4126 [(match_operand 0 "spu_reg_operand" "") | |
4127 (match_operand 1 "memory_operand" "")] | |
4128 "" | |
4129 " | |
4130 { | |
4131 spu_restore_stack_block (operands[0], operands[1]); | |
4132 DONE; | |
4133 }") | |
4134 | |
4135 (define_expand "save_stack_nonlocal" | |
4136 [(match_operand 0 "memory_operand" "") | |
4137 (match_operand 1 "spu_reg_operand" "")] | |
4138 "" | |
4139 " | |
4140 { | |
4141 rtx temp = gen_reg_rtx (Pmode); | |
4142 | |
4143 /* Copy the backchain to the first word, sp to the second. We need to | |
4144 save the back chain because __builtin_apply appears to clobber it. */ | |
4145 emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1])); | |
4146 emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp); | |
4147 emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]); | |
4148 DONE; | |
4149 }") | |
4150 | |
4151 (define_expand "restore_stack_nonlocal" | |
4152 [(match_operand 0 "spu_reg_operand" "") | |
4153 (match_operand 1 "memory_operand" "")] | |
4154 "" | |
4155 " | |
4156 { | |
4157 spu_restore_stack_nonlocal(operands[0], operands[1]); | |
4158 DONE; | |
4159 }") | |
4160 | |
4161 | |
4162 ;; vector patterns | |
4163 | |
4164 ;; Vector initialization | |
4165 (define_expand "vec_init<mode>" | |
4166 [(match_operand:V 0 "register_operand" "") | |
4167 (match_operand 1 "" "")] | |
4168 "" | |
4169 { | |
4170 spu_expand_vector_init (operands[0], operands[1]); | |
4171 DONE; | |
4172 }) | |
4173 | |
4174 (define_expand "vec_set<mode>" | |
4175 [(use (match_operand:SI 2 "spu_nonmem_operand" "")) | |
4176 (set (match_dup:TI 3) | |
4177 (unspec:TI [(match_dup:SI 4) | |
4178 (match_dup:SI 5) | |
4179 (match_dup:SI 6)] UNSPEC_CPAT)) | |
4180 (set (match_operand:V 0 "spu_reg_operand" "") | |
4181 (unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "") | |
4182 (match_dup:V 0) | |
4183 (match_dup:TI 3)] UNSPEC_SHUFB))] | |
4184 "" | |
4185 { | |
4186 HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode); | |
4187 rtx offset = GEN_INT (INTVAL (operands[2]) * size); | |
4188 operands[3] = gen_reg_rtx (TImode); | |
4189 operands[4] = stack_pointer_rtx; | |
4190 operands[5] = offset; | |
4191 operands[6] = GEN_INT (size); | |
4192 }) | |
4193 | |
4194 (define_expand "vec_extract<mode>" | |
4195 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r") | |
4196 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r") | |
4197 (parallel [(match_operand 2 "const_int_operand" "i")])))] | |
4198 "" | |
4199 { | |
4200 if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0) | |
4201 { | |
4202 emit_insn (gen_spu_convert (operands[0], operands[1])); | |
4203 DONE; | |
4204 } | |
4205 }) | |
4206 | |
4207 (define_insn "_vec_extract<mode>" | |
4208 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r") | |
4209 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r") | |
4210 (parallel [(match_operand 2 "const_int_operand" "i")])))] | |
4211 "" | |
4212 "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16" | |
4213 [(set_attr "type" "shuf")]) | |
4214 | |
4215 (define_insn "_vec_extractv8hi_ze" | |
4216 [(set (match_operand:SI 0 "spu_reg_operand" "=r") | |
4217 (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4218 (parallel [(const_int 0)]))))] | |
4219 "" | |
4220 "rotqmbyi\t%0,%1,-2" | |
4221 [(set_attr "type" "shuf")]) | |
4222 | |
4223 | |
4224 ;; misc | |
4225 | |
4226 (define_expand "shufb" | |
4227 [(set (match_operand 0 "spu_reg_operand" "") | |
4228 (unspec [(match_operand 1 "spu_reg_operand" "") | |
4229 (match_operand 2 "spu_reg_operand" "") | |
4230 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))] | |
4231 "" | |
4232 { | |
4233 rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]); | |
4234 PUT_MODE (SET_SRC (s), GET_MODE (operands[0])); | |
4235 emit_insn (s); | |
4236 DONE; | |
4237 }) | |
4238 | |
4239 (define_insn "_shufb" | |
4240 [(set (match_operand 0 "spu_reg_operand" "=r") | |
4241 (unspec [(match_operand 1 "spu_reg_operand" "r") | |
4242 (match_operand 2 "spu_reg_operand" "r") | |
4243 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))] | |
4244 "operands" | |
4245 "shufb\t%0,%1,%2,%3" | |
4246 [(set_attr "type" "shuf")]) | |
4247 | |
4248 (define_insn "nop" | |
4249 [(unspec_volatile [(const_int 0)] UNSPEC_NOP)] | |
4250 "" | |
4251 "nop" | |
4252 [(set_attr "type" "nop")]) | |
4253 | |
4254 (define_insn "nopn" | |
4255 [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPEC_NOP)] | |
4256 "" | |
4257 "nop\t%0" | |
4258 [(set_attr "type" "nop")]) | |
4259 | |
4260 (define_insn "lnop" | |
4261 [(unspec_volatile [(const_int 0)] UNSPEC_LNOP)] | |
4262 "" | |
4263 "lnop" | |
4264 [(set_attr "type" "lnop")]) | |
4265 | |
4266 ;; The operand is so we know why we generated this hbrp. | |
4267 ;; We clobber mem to make sure it isn't moved over any | |
4268 ;; loads, stores or calls while scheduling. | |
4269 (define_insn "iprefetch" | |
4270 [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH) | |
4271 (clobber (mem:BLK (scratch)))] | |
4272 "" | |
4273 "hbrp\t# %0" | |
4274 [(set_attr "type" "iprefetch")]) | |
4275 | |
4276 ;; A non-volatile version so it gets scheduled | |
4277 (define_insn "nopn_nv" | |
4278 [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)] | |
4279 "" | |
4280 "nop\t%0" | |
4281 [(set_attr "type" "nop")]) | |
4282 | |
4283 (define_insn "hbr" | |
4284 [(set (reg:SI 130) | |
4285 (unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i") | |
4286 (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR)) | |
4287 (unspec [(const_int 0)] UNSPEC_HBR)] | |
4288 "" | |
4289 "@ | |
4290 hbr\t%0,%1 | |
4291 hbrr\t%0,%1 | |
4292 hbra\t%0,%1" | |
4293 [(set_attr "type" "hbr")]) | |
4294 | |
4295 (define_insn "sync" | |
4296 [(unspec_volatile [(const_int 0)] UNSPEC_SYNC) | |
4297 (clobber (mem:BLK (scratch)))] | |
4298 "" | |
4299 "sync" | |
4300 [(set_attr "type" "br")]) | |
4301 | |
4302 (define_insn "syncc" | |
4303 [(unspec_volatile [(const_int 1)] UNSPEC_SYNC) | |
4304 (clobber (mem:BLK (scratch)))] | |
4305 "" | |
4306 "syncc" | |
4307 [(set_attr "type" "br")]) | |
4308 | |
4309 (define_insn "dsync" | |
4310 [(unspec_volatile [(const_int 2)] UNSPEC_SYNC) | |
4311 (clobber (mem:BLK (scratch)))] | |
4312 "" | |
4313 "dsync" | |
4314 [(set_attr "type" "br")]) | |
4315 | |
4316 | |
4317 | |
4318 ;; Define the subtract-one-and-jump insns so loop.c | |
4319 ;; knows what to generate. | |
4320 (define_expand "doloop_end" | |
4321 [(use (match_operand 0 "" "")) ; loop pseudo | |
4322 (use (match_operand 1 "" "")) ; iterations; zero if unknown | |
4323 (use (match_operand 2 "" "")) ; max iterations | |
4324 (use (match_operand 3 "" "")) ; loop level | |
4325 (use (match_operand 4 "" ""))] ; label | |
4326 "" | |
4327 " | |
4328 { | |
4329 /* Currently SMS relies on the do-loop pattern to recognize loops | |
4330 where (1) the control part comprises of all insns defining and/or | |
4331 using a certain 'count' register and (2) the loop count can be | |
4332 adjusted by modifying this register prior to the loop. | |
4333 . ??? The possible introduction of a new block to initialize the | |
4334 new IV can potentially effects branch optimizations. */ | |
4335 if (optimize > 0 && flag_modulo_sched) | |
4336 { | |
4337 rtx s0; | |
4338 rtx bcomp; | |
4339 rtx loc_ref; | |
4340 | |
4341 /* Only use this on innermost loops. */ | |
4342 if (INTVAL (operands[3]) > 1) | |
4343 FAIL; | |
4344 if (GET_MODE (operands[0]) != SImode) | |
4345 FAIL; | |
4346 | |
4347 s0 = operands [0]; | |
4348 emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1))); | |
4349 bcomp = gen_rtx_NE(SImode, s0, const0_rtx); | |
4350 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]); | |
4351 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, | |
4352 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, | |
4353 loc_ref, pc_rtx))); | |
4354 | |
4355 DONE; | |
4356 }else | |
4357 FAIL; | |
4358 }") | |
4359 | |
4360 ;; convert between any two modes, avoiding any GCC assumptions | |
4361 (define_expand "spu_convert" | |
4362 [(set (match_operand 0 "spu_reg_operand" "") | |
4363 (unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))] | |
4364 "" | |
4365 { | |
4366 rtx c = gen__spu_convert (operands[0], operands[1]); | |
4367 PUT_MODE (SET_SRC (c), GET_MODE (operands[0])); | |
4368 emit_insn (c); | |
4369 DONE; | |
4370 }) | |
4371 | |
4372 (define_insn "_spu_convert" | |
4373 [(set (match_operand 0 "spu_reg_operand" "=r") | |
4374 (unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))] | |
4375 "operands" | |
4376 "" | |
4377 [(set_attr "type" "convert") | |
4378 (set_attr "length" "0")]) | |
4379 | |
4380 (define_peephole2 | |
4381 [(set (match_operand 0 "spu_reg_operand") | |
4382 (unspec [(match_operand 1 "spu_reg_operand")] UNSPEC_CONVERT))] | |
4383 "" | |
4384 [(use (const_int 0))] | |
4385 "") | |
4386 | |
4387 | |
4388 ;; | |
4389 (include "spu-builtins.md") | |
4390 | |
4391 | |
4392 (define_expand "smaxv4sf3" | |
4393 [(set (match_operand:V4SF 0 "register_operand" "=r") | |
4394 (smax:V4SF (match_operand:V4SF 1 "register_operand" "r") | |
4395 (match_operand:V4SF 2 "register_operand" "r")))] | |
4396 "" | |
4397 " | |
4398 { | |
4399 rtx mask = gen_reg_rtx (V4SImode); | |
4400 | |
4401 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2])); | |
4402 emit_insn (gen_selb (operands[0], operands[2], operands[1], mask)); | |
4403 DONE; | |
4404 }") | |
4405 | |
4406 (define_expand "sminv4sf3" | |
4407 [(set (match_operand:V4SF 0 "register_operand" "=r") | |
4408 (smin:V4SF (match_operand:V4SF 1 "register_operand" "r") | |
4409 (match_operand:V4SF 2 "register_operand" "r")))] | |
4410 "" | |
4411 " | |
4412 { | |
4413 rtx mask = gen_reg_rtx (V4SImode); | |
4414 | |
4415 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2])); | |
4416 emit_insn (gen_selb (operands[0], operands[1], operands[2], mask)); | |
4417 DONE; | |
4418 }") | |
4419 | |
4420 (define_expand "smaxv2df3" | |
4421 [(set (match_operand:V2DF 0 "register_operand" "=r") | |
4422 (smax:V2DF (match_operand:V2DF 1 "register_operand" "r") | |
4423 (match_operand:V2DF 2 "register_operand" "r")))] | |
4424 "" | |
4425 " | |
4426 { | |
4427 rtx mask = gen_reg_rtx (V2DImode); | |
4428 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); | |
4429 emit_insn (gen_selb (operands[0], operands[2], operands[1], | |
4430 spu_gen_subreg (V4SImode, mask))); | |
4431 DONE; | |
4432 }") | |
4433 | |
4434 (define_expand "sminv2df3" | |
4435 [(set (match_operand:V2DF 0 "register_operand" "=r") | |
4436 (smin:V2DF (match_operand:V2DF 1 "register_operand" "r") | |
4437 (match_operand:V2DF 2 "register_operand" "r")))] | |
4438 "" | |
4439 " | |
4440 { | |
4441 rtx mask = gen_reg_rtx (V2DImode); | |
4442 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); | |
4443 emit_insn (gen_selb (operands[0], operands[1], operands[2], | |
4444 spu_gen_subreg (V4SImode, mask))); | |
4445 DONE; | |
4446 }") | |
4447 | |
4448 (define_expand "vec_widen_umult_hi_v8hi" | |
4449 [(set (match_operand:V4SI 0 "register_operand" "=r") | |
4450 (mult:V4SI | |
4451 (zero_extend:V4SI | |
4452 (vec_select:V4HI | |
4453 (match_operand:V8HI 1 "register_operand" "r") | |
4454 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) | |
4455 (zero_extend:V4SI | |
4456 (vec_select:V4HI | |
4457 (match_operand:V8HI 2 "register_operand" "r") | |
4458 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))] | |
4459 "" | |
4460 " | |
4461 { | |
4462 rtx ve = gen_reg_rtx (V4SImode); | |
4463 rtx vo = gen_reg_rtx (V4SImode); | |
4464 rtx mask = gen_reg_rtx (TImode); | |
4465 unsigned char arr[16] = { | |
4466 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | |
4467 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; | |
4468 | |
4469 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4470 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2])); | |
4471 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2])); | |
4472 emit_insn (gen_shufb (operands[0], ve, vo, mask)); | |
4473 DONE; | |
4474 }") | |
4475 | |
4476 (define_expand "vec_widen_umult_lo_v8hi" | |
4477 [(set (match_operand:V4SI 0 "register_operand" "=r") | |
4478 (mult:V4SI | |
4479 (zero_extend:V4SI | |
4480 (vec_select:V4HI | |
4481 (match_operand:V8HI 1 "register_operand" "r") | |
4482 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) | |
4483 (zero_extend:V4SI | |
4484 (vec_select:V4HI | |
4485 (match_operand:V8HI 2 "register_operand" "r") | |
4486 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))] | |
4487 "" | |
4488 " | |
4489 { | |
4490 rtx ve = gen_reg_rtx (V4SImode); | |
4491 rtx vo = gen_reg_rtx (V4SImode); | |
4492 rtx mask = gen_reg_rtx (TImode); | |
4493 unsigned char arr[16] = { | |
4494 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, | |
4495 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F}; | |
4496 | |
4497 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4498 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2])); | |
4499 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2])); | |
4500 emit_insn (gen_shufb (operands[0], ve, vo, mask)); | |
4501 DONE; | |
4502 }") | |
4503 | |
4504 (define_expand "vec_widen_smult_hi_v8hi" | |
4505 [(set (match_operand:V4SI 0 "register_operand" "=r") | |
4506 (mult:V4SI | |
4507 (sign_extend:V4SI | |
4508 (vec_select:V4HI | |
4509 (match_operand:V8HI 1 "register_operand" "r") | |
4510 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) | |
4511 (sign_extend:V4SI | |
4512 (vec_select:V4HI | |
4513 (match_operand:V8HI 2 "register_operand" "r") | |
4514 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))] | |
4515 "" | |
4516 " | |
4517 { | |
4518 rtx ve = gen_reg_rtx (V4SImode); | |
4519 rtx vo = gen_reg_rtx (V4SImode); | |
4520 rtx mask = gen_reg_rtx (TImode); | |
4521 unsigned char arr[16] = { | |
4522 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | |
4523 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; | |
4524 | |
4525 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4526 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2])); | |
4527 emit_insn (gen_spu_mpy (vo, operands[1], operands[2])); | |
4528 emit_insn (gen_shufb (operands[0], ve, vo, mask)); | |
4529 DONE; | |
4530 }") | |
4531 | |
4532 (define_expand "vec_widen_smult_lo_v8hi" | |
4533 [(set (match_operand:V4SI 0 "register_operand" "=r") | |
4534 (mult:V4SI | |
4535 (sign_extend:V4SI | |
4536 (vec_select:V4HI | |
4537 (match_operand:V8HI 1 "register_operand" "r") | |
4538 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) | |
4539 (sign_extend:V4SI | |
4540 (vec_select:V4HI | |
4541 (match_operand:V8HI 2 "register_operand" "r") | |
4542 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))] | |
4543 "" | |
4544 " | |
4545 { | |
4546 rtx ve = gen_reg_rtx (V4SImode); | |
4547 rtx vo = gen_reg_rtx (V4SImode); | |
4548 rtx mask = gen_reg_rtx (TImode); | |
4549 unsigned char arr[16] = { | |
4550 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, | |
4551 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F}; | |
4552 | |
4553 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4554 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2])); | |
4555 emit_insn (gen_spu_mpy (vo, operands[1], operands[2])); | |
4556 emit_insn (gen_shufb (operands[0], ve, vo, mask)); | |
4557 DONE; | |
4558 }") | |
4559 | |
4560 (define_expand "vec_realign_load_<mode>" | |
4561 [(set (match_operand:ALL 0 "register_operand" "=r") | |
4562 (unspec:ALL [(match_operand:ALL 1 "register_operand" "r") | |
4563 (match_operand:ALL 2 "register_operand" "r") | |
4564 (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))] | |
4565 "" | |
4566 " | |
4567 { | |
4568 emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3])); | |
4569 DONE; | |
4570 }") | |
4571 | |
4572 (define_expand "spu_lvsr" | |
4573 [(set (match_operand:V16QI 0 "register_operand" "") | |
4574 (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))] | |
4575 "" | |
4576 " | |
4577 { | |
4578 rtx addr; | |
4579 rtx offset = gen_reg_rtx (V8HImode); | |
4580 rtx addr_bits = gen_reg_rtx (SImode); | |
4581 rtx addr_bits_vec = gen_reg_rtx (V8HImode); | |
4582 rtx splatqi = gen_reg_rtx (TImode); | |
4583 rtx result = gen_reg_rtx (V8HImode); | |
4584 unsigned char arr[16] = { | |
4585 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
4586 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; | |
4587 unsigned char arr2[16] = { | |
4588 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, | |
4589 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}; | |
4590 | |
4591 emit_move_insn (offset, array_to_constant (V8HImode, arr)); | |
4592 emit_move_insn (splatqi, array_to_constant (TImode, arr2)); | |
4593 | |
4594 gcc_assert (GET_CODE (operands[1]) == MEM); | |
4595 addr = force_reg (Pmode, XEXP (operands[1], 0)); | |
4596 emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF))); | |
4597 emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi)); | |
4598 | |
4599 /* offset - (addr & 0xF) | |
4600 It is safe to use a single sfh, because each byte of offset is > 15 and | |
4601 each byte of addr is <= 15. */ | |
4602 emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec)); | |
4603 | |
4604 result = simplify_gen_subreg (V16QImode, result, V8HImode, 0); | |
4605 emit_move_insn (operands[0], result); | |
4606 | |
4607 DONE; | |
4608 }") | |
4609 | |
4610 (define_expand "vec_unpacku_hi_v8hi" | |
4611 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4612 (zero_extend:V4SI | |
4613 (vec_select:V4HI | |
4614 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4615 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] | |
4616 "" | |
4617 { | |
4618 rtx mask = gen_reg_rtx (TImode); | |
4619 unsigned char arr[16] = { | |
4620 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, | |
4621 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; | |
4622 | |
4623 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4624 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); | |
4625 | |
4626 DONE; | |
4627 }) | |
4628 | |
4629 (define_expand "vec_unpacku_lo_v8hi" | |
4630 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4631 (zero_extend:V4SI | |
4632 (vec_select:V4HI | |
4633 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4634 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] | |
4635 "" | |
4636 { | |
4637 rtx mask = gen_reg_rtx (TImode); | |
4638 unsigned char arr[16] = { | |
4639 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, | |
4640 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; | |
4641 | |
4642 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4643 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); | |
4644 | |
4645 DONE; | |
4646 }) | |
4647 | |
4648 (define_expand "vec_unpacks_hi_v8hi" | |
4649 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4650 (sign_extend:V4SI | |
4651 (vec_select:V4HI | |
4652 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4653 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] | |
4654 "" | |
4655 { | |
4656 rtx tmp1 = gen_reg_rtx (V8HImode); | |
4657 rtx tmp2 = gen_reg_rtx (V4SImode); | |
4658 rtx mask = gen_reg_rtx (TImode); | |
4659 unsigned char arr[16] = { | |
4660 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, | |
4661 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; | |
4662 | |
4663 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4664 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); | |
4665 emit_insn (gen_spu_xshw (tmp2, tmp1)); | |
4666 emit_move_insn (operands[0], tmp2); | |
4667 | |
4668 DONE; | |
4669 }) | |
4670 | |
4671 (define_expand "vec_unpacks_lo_v8hi" | |
4672 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4673 (sign_extend:V4SI | |
4674 (vec_select:V4HI | |
4675 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4676 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] | |
4677 "" | |
4678 { | |
4679 rtx tmp1 = gen_reg_rtx (V8HImode); | |
4680 rtx tmp2 = gen_reg_rtx (V4SImode); | |
4681 rtx mask = gen_reg_rtx (TImode); | |
4682 unsigned char arr[16] = { | |
4683 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, | |
4684 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; | |
4685 | |
4686 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4687 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); | |
4688 emit_insn (gen_spu_xshw (tmp2, tmp1)); | |
4689 emit_move_insn (operands[0], tmp2); | |
4690 | |
4691 DONE; | |
4692 }) | |
4693 | |
4694 (define_expand "vec_unpacku_hi_v16qi" | |
4695 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4696 (zero_extend:V8HI | |
4697 (vec_select:V8QI | |
4698 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4699 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) | |
4700 (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] | |
4701 "" | |
4702 { | |
4703 rtx mask = gen_reg_rtx (TImode); | |
4704 unsigned char arr[16] = { | |
4705 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, | |
4706 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; | |
4707 | |
4708 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4709 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); | |
4710 | |
4711 DONE; | |
4712 }) | |
4713 | |
4714 (define_expand "vec_unpacku_lo_v16qi" | |
4715 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4716 (zero_extend:V8HI | |
4717 (vec_select:V8QI | |
4718 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4719 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) | |
4720 (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] | |
4721 "" | |
4722 { | |
4723 rtx mask = gen_reg_rtx (TImode); | |
4724 unsigned char arr[16] = { | |
4725 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, | |
4726 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; | |
4727 | |
4728 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4729 emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); | |
4730 | |
4731 DONE; | |
4732 }) | |
4733 | |
4734 (define_expand "vec_unpacks_hi_v16qi" | |
4735 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4736 (sign_extend:V8HI | |
4737 (vec_select:V8QI | |
4738 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4739 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) | |
4740 (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] | |
4741 "" | |
4742 { | |
4743 rtx tmp1 = gen_reg_rtx (V16QImode); | |
4744 rtx tmp2 = gen_reg_rtx (V8HImode); | |
4745 rtx mask = gen_reg_rtx (TImode); | |
4746 unsigned char arr[16] = { | |
4747 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, | |
4748 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; | |
4749 | |
4750 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4751 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); | |
4752 emit_insn (gen_spu_xsbh (tmp2, tmp1)); | |
4753 emit_move_insn (operands[0], tmp2); | |
4754 | |
4755 DONE; | |
4756 }) | |
4757 | |
4758 (define_expand "vec_unpacks_lo_v16qi" | |
4759 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4760 (sign_extend:V8HI | |
4761 (vec_select:V8QI | |
4762 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4763 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) | |
4764 (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] | |
4765 "" | |
4766 { | |
4767 rtx tmp1 = gen_reg_rtx (V16QImode); | |
4768 rtx tmp2 = gen_reg_rtx (V8HImode); | |
4769 rtx mask = gen_reg_rtx (TImode); | |
4770 unsigned char arr[16] = { | |
4771 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, | |
4772 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; | |
4773 | |
4774 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4775 emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); | |
4776 emit_insn (gen_spu_xsbh (tmp2, tmp1)); | |
4777 emit_move_insn (operands[0], tmp2); | |
4778 | |
4779 DONE; | |
4780 }) | |
4781 | |
4782 | |
4783 (define_expand "vec_extract_evenv4si" | |
4784 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4785 (vec_concat:V4SI | |
4786 (vec_select:V2SI | |
4787 (match_operand:V4SI 1 "spu_reg_operand" "r") | |
4788 (parallel [(const_int 0)(const_int 2)])) | |
4789 (vec_select:V2SI | |
4790 (match_operand:V4SI 2 "spu_reg_operand" "r") | |
4791 (parallel [(const_int 0)(const_int 2)]))))] | |
4792 | |
4793 "" | |
4794 " | |
4795 { | |
4796 rtx mask = gen_reg_rtx (TImode); | |
4797 unsigned char arr[16] = { | |
4798 0x00, 0x01, 0x02, 0x03, | |
4799 0x08, 0x09, 0x0A, 0x0B, | |
4800 0x10, 0x11, 0x12, 0x13, | |
4801 0x18, 0x19, 0x1A, 0x1B}; | |
4802 | |
4803 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4804 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4805 DONE; | |
4806 }") | |
4807 | |
4808 | |
4809 (define_expand "vec_extract_evenv4sf" | |
4810 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
4811 (vec_concat:V4SF | |
4812 (vec_select:V2SF | |
4813 (match_operand:V4SF 1 "spu_reg_operand" "r") | |
4814 (parallel [(const_int 0)(const_int 2)])) | |
4815 (vec_select:V2SF | |
4816 (match_operand:V4SF 2 "spu_reg_operand" "r") | |
4817 (parallel [(const_int 0)(const_int 2)]))))] | |
4818 | |
4819 "" | |
4820 " | |
4821 { | |
4822 rtx mask = gen_reg_rtx (TImode); | |
4823 unsigned char arr[16] = { | |
4824 0x00, 0x01, 0x02, 0x03, | |
4825 0x08, 0x09, 0x0A, 0x0B, | |
4826 0x10, 0x11, 0x12, 0x13, | |
4827 0x18, 0x19, 0x1A, 0x1B}; | |
4828 | |
4829 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4830 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4831 DONE; | |
4832 }") | |
4833 | |
4834 (define_expand "vec_extract_evenv8hi" | |
4835 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4836 (vec_concat:V8HI | |
4837 (vec_select:V4HI | |
4838 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4839 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])) | |
4840 (vec_select:V4HI | |
4841 (match_operand:V8HI 2 "spu_reg_operand" "r") | |
4842 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))] | |
4843 | |
4844 "" | |
4845 " | |
4846 { | |
4847 rtx mask = gen_reg_rtx (TImode); | |
4848 unsigned char arr[16] = { | |
4849 0x00, 0x01, 0x04, 0x05, | |
4850 0x08, 0x09, 0x0C, 0x0D, | |
4851 0x10, 0x11, 0x14, 0x15, | |
4852 0x18, 0x19, 0x1C, 0x1D}; | |
4853 | |
4854 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4855 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4856 DONE; | |
4857 }") | |
4858 | |
4859 (define_expand "vec_extract_evenv16qi" | |
4860 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
4861 (vec_concat:V16QI | |
4862 (vec_select:V8QI | |
4863 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4864 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6) | |
4865 (const_int 8)(const_int 10)(const_int 12)(const_int 14)])) | |
4866 (vec_select:V8QI | |
4867 (match_operand:V16QI 2 "spu_reg_operand" "r") | |
4868 (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6) | |
4869 (const_int 8)(const_int 10)(const_int 12)(const_int 14)]))))] | |
4870 | |
4871 "" | |
4872 " | |
4873 { | |
4874 rtx mask = gen_reg_rtx (TImode); | |
4875 unsigned char arr[16] = { | |
4876 0x00, 0x02, 0x04, 0x06, | |
4877 0x08, 0x0A, 0x0C, 0x0E, | |
4878 0x10, 0x12, 0x14, 0x16, | |
4879 0x18, 0x1A, 0x1C, 0x1E}; | |
4880 | |
4881 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4882 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4883 DONE; | |
4884 }") | |
4885 | |
4886 (define_expand "vec_extract_oddv4si" | |
4887 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
4888 (vec_concat:V4SI | |
4889 (vec_select:V2SI | |
4890 (match_operand:V4SI 1 "spu_reg_operand" "r") | |
4891 (parallel [(const_int 1)(const_int 3)])) | |
4892 (vec_select:V2SI | |
4893 (match_operand:V4SI 2 "spu_reg_operand" "r") | |
4894 (parallel [(const_int 1)(const_int 3)]))))] | |
4895 | |
4896 "" | |
4897 " | |
4898 { | |
4899 rtx mask = gen_reg_rtx (TImode); | |
4900 unsigned char arr[16] = { | |
4901 0x04, 0x05, 0x06, 0x07, | |
4902 0x0C, 0x0D, 0x0E, 0x0F, | |
4903 0x14, 0x15, 0x16, 0x17, | |
4904 0x1C, 0x1D, 0x1E, 0x1F}; | |
4905 | |
4906 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4907 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4908 DONE; | |
4909 }") | |
4910 | |
4911 (define_expand "vec_extract_oddv4sf" | |
4912 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
4913 (vec_concat:V4SF | |
4914 (vec_select:V2SF | |
4915 (match_operand:V4SF 1 "spu_reg_operand" "r") | |
4916 (parallel [(const_int 1)(const_int 3)])) | |
4917 (vec_select:V2SF | |
4918 (match_operand:V4SF 2 "spu_reg_operand" "r") | |
4919 (parallel [(const_int 1)(const_int 3)]))))] | |
4920 | |
4921 "" | |
4922 " | |
4923 { | |
4924 rtx mask = gen_reg_rtx (TImode); | |
4925 unsigned char arr[16] = { | |
4926 0x04, 0x05, 0x06, 0x07, | |
4927 0x0C, 0x0D, 0x0E, 0x0F, | |
4928 0x14, 0x15, 0x16, 0x17, | |
4929 0x1C, 0x1D, 0x1E, 0x1F}; | |
4930 | |
4931 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4932 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4933 DONE; | |
4934 }") | |
4935 | |
4936 (define_expand "vec_extract_oddv8hi" | |
4937 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
4938 (vec_concat:V8HI | |
4939 (vec_select:V4HI | |
4940 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
4941 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])) | |
4942 (vec_select:V4HI | |
4943 (match_operand:V8HI 2 "spu_reg_operand" "r") | |
4944 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))] | |
4945 | |
4946 "" | |
4947 " | |
4948 { | |
4949 rtx mask = gen_reg_rtx (TImode); | |
4950 unsigned char arr[16] = { | |
4951 0x02, 0x03, 0x06, 0x07, | |
4952 0x0A, 0x0B, 0x0E, 0x0F, | |
4953 0x12, 0x13, 0x16, 0x17, | |
4954 0x1A, 0x1B, 0x1E, 0x1F}; | |
4955 | |
4956 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4957 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4958 DONE; | |
4959 }") | |
4960 | |
4961 (define_expand "vec_extract_oddv16qi" | |
4962 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
4963 (vec_concat:V16QI | |
4964 (vec_select:V8QI | |
4965 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
4966 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7) | |
4967 (const_int 9)(const_int 11)(const_int 13)(const_int 15)])) | |
4968 (vec_select:V8QI | |
4969 (match_operand:V16QI 2 "spu_reg_operand" "r") | |
4970 (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7) | |
4971 (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))] | |
4972 | |
4973 "" | |
4974 " | |
4975 { | |
4976 rtx mask = gen_reg_rtx (TImode); | |
4977 unsigned char arr[16] = { | |
4978 0x01, 0x03, 0x05, 0x07, | |
4979 0x09, 0x0B, 0x0D, 0x0F, | |
4980 0x11, 0x13, 0x15, 0x17, | |
4981 0x19, 0x1B, 0x1D, 0x1F}; | |
4982 | |
4983 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
4984 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
4985 DONE; | |
4986 }") | |
4987 | |
4988 (define_expand "vec_interleave_highv4sf" | |
4989 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
4990 (vec_select:V4SF | |
4991 (vec_concat:V4SF | |
4992 (vec_select:V2SF | |
4993 (match_operand:V4SF 1 "spu_reg_operand" "r") | |
4994 (parallel [(const_int 0)(const_int 1)])) | |
4995 (vec_select:V2SF | |
4996 (match_operand:V4SF 2 "spu_reg_operand" "r") | |
4997 (parallel [(const_int 0)(const_int 1)]))) | |
4998 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] | |
4999 | |
5000 "" | |
5001 " | |
5002 { | |
5003 rtx mask = gen_reg_rtx (TImode); | |
5004 unsigned char arr[16] = { | |
5005 0x00, 0x01, 0x02, 0x03, | |
5006 0x10, 0x11, 0x12, 0x13, | |
5007 0x04, 0x05, 0x06, 0x07, | |
5008 0x14, 0x15, 0x16, 0x17}; | |
5009 | |
5010 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5011 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5012 DONE; | |
5013 }") | |
5014 | |
5015 (define_expand "vec_interleave_lowv4sf" | |
5016 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") | |
5017 (vec_select:V4SF | |
5018 (vec_concat:V4SF | |
5019 (vec_select:V2SF | |
5020 (match_operand:V4SF 1 "spu_reg_operand" "r") | |
5021 (parallel [(const_int 2)(const_int 3)])) | |
5022 (vec_select:V2SF | |
5023 (match_operand:V4SF 2 "spu_reg_operand" "r") | |
5024 (parallel [(const_int 2)(const_int 3)]))) | |
5025 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] | |
5026 | |
5027 "" | |
5028 " | |
5029 { | |
5030 rtx mask = gen_reg_rtx (TImode); | |
5031 unsigned char arr[16] = { | |
5032 0x08, 0x09, 0x0A, 0x0B, | |
5033 0x18, 0x19, 0x1A, 0x1B, | |
5034 0x0C, 0x0D, 0x0E, 0x0F, | |
5035 0x1C, 0x1D, 0x1E, 0x1F}; | |
5036 | |
5037 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5038 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5039 DONE; | |
5040 }") | |
5041 | |
5042 (define_expand "vec_interleave_highv4si" | |
5043 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
5044 (vec_select:V4SI | |
5045 (vec_concat:V4SI | |
5046 (vec_select:V2SI | |
5047 (match_operand:V4SI 1 "spu_reg_operand" "r") | |
5048 (parallel [(const_int 0)(const_int 1)])) | |
5049 (vec_select:V2SI | |
5050 (match_operand:V4SI 2 "spu_reg_operand" "r") | |
5051 (parallel [(const_int 0)(const_int 1)]))) | |
5052 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] | |
5053 | |
5054 "" | |
5055 " | |
5056 { | |
5057 rtx mask = gen_reg_rtx (TImode); | |
5058 unsigned char arr[16] = { | |
5059 0x00, 0x01, 0x02, 0x03, | |
5060 0x10, 0x11, 0x12, 0x13, | |
5061 0x04, 0x05, 0x06, 0x07, | |
5062 0x14, 0x15, 0x16, 0x17}; | |
5063 | |
5064 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5065 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5066 DONE; | |
5067 }") | |
5068 | |
5069 (define_expand "vec_interleave_lowv4si" | |
5070 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") | |
5071 (vec_select:V4SI | |
5072 (vec_concat:V4SI | |
5073 (vec_select:V2SI | |
5074 (match_operand:V4SI 1 "spu_reg_operand" "r") | |
5075 (parallel [(const_int 2)(const_int 3)])) | |
5076 (vec_select:V2SI | |
5077 (match_operand:V4SI 2 "spu_reg_operand" "r") | |
5078 (parallel [(const_int 2)(const_int 3)]))) | |
5079 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] | |
5080 | |
5081 "" | |
5082 " | |
5083 { | |
5084 rtx mask = gen_reg_rtx (TImode); | |
5085 unsigned char arr[16] = { | |
5086 0x08, 0x09, 0x0A, 0x0B, | |
5087 0x18, 0x19, 0x1A, 0x1B, | |
5088 0x0C, 0x0D, 0x0E, 0x0F, | |
5089 0x1C, 0x1D, 0x1E, 0x1F}; | |
5090 | |
5091 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5092 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5093 DONE; | |
5094 }") | |
5095 | |
5096 (define_expand "vec_interleave_highv8hi" | |
5097 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
5098 (vec_select:V8HI | |
5099 (vec_concat:V8HI | |
5100 (vec_select:V4HI | |
5101 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
5102 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])) | |
5103 (vec_select:V4HI | |
5104 (match_operand:V8HI 2 "spu_reg_operand" "r") | |
5105 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) | |
5106 (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5) | |
5107 (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))] | |
5108 | |
5109 "" | |
5110 " | |
5111 { | |
5112 rtx mask = gen_reg_rtx (TImode); | |
5113 unsigned char arr[16] = { | |
5114 0x00, 0x01, 0x10, 0x11, | |
5115 0x02, 0x03, 0x12, 0x13, | |
5116 0x04, 0x05, 0x14, 0x15, | |
5117 0x06, 0x07, 0x16, 0x17}; | |
5118 | |
5119 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5120 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5121 DONE; | |
5122 }") | |
5123 | |
5124 (define_expand "vec_interleave_lowv8hi" | |
5125 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
5126 (vec_select:V8HI | |
5127 (vec_concat:V8HI | |
5128 (vec_select:V4HI | |
5129 (match_operand:V8HI 1 "spu_reg_operand" "r") | |
5130 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])) | |
5131 (vec_select:V4HI | |
5132 (match_operand:V8HI 2 "spu_reg_operand" "r") | |
5133 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) | |
5134 (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5) | |
5135 (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))] | |
5136 | |
5137 "" | |
5138 " | |
5139 { | |
5140 rtx mask = gen_reg_rtx (TImode); | |
5141 unsigned char arr[16] = { | |
5142 0x08, 0x09, 0x18, 0x19, | |
5143 0x0A, 0x0B, 0x1A, 0x1B, | |
5144 0x0C, 0x0D, 0x1C, 0x1D, | |
5145 0x0E, 0x0F, 0x1E, 0x1F}; | |
5146 | |
5147 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5148 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5149 DONE; | |
5150 }") | |
5151 | |
5152 (define_expand "vec_interleave_highv16qi" | |
5153 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
5154 (vec_select:V16QI | |
5155 (vec_concat:V16QI | |
5156 (vec_select:V8QI | |
5157 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
5158 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) | |
5159 (const_int 4)(const_int 5)(const_int 6)(const_int 7)])) | |
5160 (vec_select:V8QI | |
5161 (match_operand:V16QI 2 "spu_reg_operand" "r") | |
5162 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) | |
5163 (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) | |
5164 (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9) | |
5165 (const_int 2)(const_int 10)(const_int 3)(const_int 11) | |
5166 (const_int 4)(const_int 12)(const_int 5)(const_int 13) | |
5167 (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))] | |
5168 | |
5169 "" | |
5170 " | |
5171 { | |
5172 rtx mask = gen_reg_rtx (TImode); | |
5173 unsigned char arr[16] = { | |
5174 0x00, 0x10, 0x01, 0x11, | |
5175 0x02, 0x12, 0x03, 0x13, | |
5176 0x04, 0x14, 0x05, 0x15, | |
5177 0x06, 0x16, 0x07, 0x17}; | |
5178 | |
5179 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5180 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5181 DONE; | |
5182 }") | |
5183 | |
5184 (define_expand "vec_interleave_lowv16qi" | |
5185 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
5186 (vec_select:V16QI | |
5187 (vec_concat:V16QI | |
5188 (vec_select:V8QI | |
5189 (match_operand:V16QI 1 "spu_reg_operand" "r") | |
5190 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) | |
5191 (const_int 12)(const_int 13)(const_int 14)(const_int 15)])) | |
5192 (vec_select:V8QI | |
5193 (match_operand:V16QI 2 "spu_reg_operand" "r") | |
5194 (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) | |
5195 (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))) | |
5196 (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9) | |
5197 (const_int 2)(const_int 10)(const_int 3)(const_int 11) | |
5198 (const_int 4)(const_int 12)(const_int 5)(const_int 13) | |
5199 (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))] | |
5200 | |
5201 "" | |
5202 " | |
5203 { | |
5204 rtx mask = gen_reg_rtx (TImode); | |
5205 unsigned char arr[16] = { | |
5206 0x08, 0x18, 0x09, 0x19, | |
5207 0x0A, 0x1A, 0x0B, 0x1B, | |
5208 0x0C, 0x1C, 0x0D, 0x1D, | |
5209 0x0E, 0x1E, 0x0F, 0x1F}; | |
5210 | |
5211 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5212 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5213 DONE; | |
5214 }") | |
5215 | |
5216 (define_expand "vec_pack_trunc_v8hi" | |
5217 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") | |
5218 (vec_concat:V16QI | |
5219 (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r")) | |
5220 (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))] | |
5221 "" | |
5222 " | |
5223 { | |
5224 rtx mask = gen_reg_rtx (TImode); | |
5225 unsigned char arr[16] = { | |
5226 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, | |
5227 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F}; | |
5228 | |
5229 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5230 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5231 | |
5232 DONE; | |
5233 }") | |
5234 | |
5235 (define_expand "vec_pack_trunc_v4si" | |
5236 [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") | |
5237 (vec_concat:V8HI | |
5238 (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r")) | |
5239 (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))] | |
5240 "" | |
5241 " | |
5242 { | |
5243 rtx mask = gen_reg_rtx (TImode); | |
5244 unsigned char arr[16] = { | |
5245 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, | |
5246 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F}; | |
5247 | |
5248 emit_move_insn (mask, array_to_constant (TImode, arr)); | |
5249 emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); | |
5250 | |
5251 DONE; | |
5252 }") | |
5253 | |
5254 (define_insn "stack_protect_set" | |
5255 [(set (match_operand:SI 0 "spu_mem_operand" "=m") | |
5256 (unspec:SI [(match_operand:SI 1 "spu_mem_operand" "m")] UNSPEC_SP_SET)) | |
5257 (set (match_scratch:SI 2 "=&r") (const_int 0))] | |
5258 "" | |
5259 "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2" | |
5260 [(set_attr "length" "12") | |
5261 (set_attr "type" "multi1")] | |
5262 ) | |
5263 | |
5264 (define_expand "stack_protect_test" | |
5265 [(match_operand 0 "spu_mem_operand" "") | |
5266 (match_operand 1 "spu_mem_operand" "") | |
5267 (match_operand 2 "" "")] | |
5268 "" | |
5269 { | |
5270 rtx compare_result; | |
5271 rtx bcomp, loc_ref; | |
5272 | |
5273 compare_result = gen_reg_rtx (SImode); | |
5274 | |
5275 emit_insn (gen_stack_protect_test_si (compare_result, | |
5276 operands[0], | |
5277 operands[1])); | |
5278 | |
5279 bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx); | |
5280 | |
5281 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]); | |
5282 | |
5283 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, | |
5284 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, | |
5285 loc_ref, pc_rtx))); | |
5286 | |
5287 DONE; | |
5288 }) | |
5289 | |
5290 (define_insn "stack_protect_test_si" | |
5291 [(set (match_operand:SI 0 "spu_reg_operand" "=&r") | |
5292 (unspec:SI [(match_operand:SI 1 "spu_mem_operand" "m") | |
5293 (match_operand:SI 2 "spu_mem_operand" "m")] | |
5294 UNSPEC_SP_TEST)) | |
5295 (set (match_scratch:SI 3 "=&r") (const_int 0))] | |
5296 "" | |
5297 "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3" | |
5298 [(set_attr "length" "16") | |
5299 (set_attr "type" "multi1")] | |
5300 ) | |
5301 |