145
|
1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
|
|
2
|
|
3 ;; This file is free software; you can redistribute it and/or modify it under
|
|
4 ;; the terms of the GNU General Public License as published by the Free
|
|
5 ;; Software Foundation; either version 3 of the License, or (at your option)
|
|
6 ;; any later version.
|
|
7
|
|
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
|
|
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
11 ;; for more details.
|
|
12
|
|
13 ;; You should have received a copy of the GNU General Public License
|
|
14 ;; along with GCC; see the file COPYING3. If not see
|
|
15 ;; <http://www.gnu.org/licenses/>.
|
|
16
|
|
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
|
|
18
|
|
19 (include "predicates.md")
|
|
20 (include "constraints.md")
|
|
21
|
|
22 ;; {{{ Constants and enums
|
|
23
|
|
24 ; Named registers
|
|
25 (define_constants
|
|
26 [(FIRST_SGPR_REG 0)
|
|
27 (CC_SAVE_REG 22)
|
|
28 (LAST_SGPR_REG 101)
|
|
29 (FLAT_SCRATCH_REG 102)
|
|
30 (FLAT_SCRATCH_LO_REG 102)
|
|
31 (FLAT_SCRATCH_HI_REG 103)
|
|
32 (XNACK_MASK_REG 104)
|
|
33 (XNACK_MASK_LO_REG 104)
|
|
34 (XNACK_MASK_HI_REG 105)
|
|
35 (VCC_REG 106)
|
|
36 (VCC_LO_REG 106)
|
|
37 (VCC_HI_REG 107)
|
|
38 (VCCZ_REG 108)
|
|
39 (TBA_REG 109)
|
|
40 (TBA_LO_REG 109)
|
|
41 (TBA_HI_REG 110)
|
|
42 (TMA_REG 111)
|
|
43 (TMA_LO_REG 111)
|
|
44 (TMA_HI_REG 112)
|
|
45 (TTMP0_REG 113)
|
|
46 (TTMP11_REG 124)
|
|
47 (M0_REG 125)
|
|
48 (EXEC_REG 126)
|
|
49 (EXEC_LO_REG 126)
|
|
50 (EXEC_HI_REG 127)
|
|
51 (EXECZ_REG 128)
|
|
52 (SCC_REG 129)
|
|
53 (FIRST_VGPR_REG 160)
|
|
54 (LAST_VGPR_REG 415)])
|
|
55
|
|
56 (define_constants
|
|
57 [(SP_REGNUM 16)
|
|
58 (LR_REGNUM 18)
|
|
59 (AP_REGNUM 416)
|
|
60 (FP_REGNUM 418)])
|
|
61
|
|
62 (define_c_enum "unspecv" [
|
|
63 UNSPECV_PROLOGUE_USE
|
|
64 UNSPECV_KERNEL_RETURN
|
|
65 UNSPECV_BARRIER
|
|
66 UNSPECV_ATOMIC
|
|
67 UNSPECV_ICACHE_INV])
|
|
68
|
|
69 (define_c_enum "unspec" [
|
|
70 UNSPEC_VECTOR
|
|
71 UNSPEC_BPERMUTE
|
|
72 UNSPEC_SGPRBASE
|
|
73 UNSPEC_MEMORY_BARRIER
|
|
74 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
|
|
75 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
|
|
76 UNSPEC_PLUS_DPP_SHR
|
|
77 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
|
|
78 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
|
|
79 UNSPEC_MOV_FROM_LANE63
|
|
80 UNSPEC_GATHER
|
|
81 UNSPEC_SCATTER])
|
|
82
|
|
83 ;; }}}
|
|
84 ;; {{{ Attributes
|
|
85
|
|
86 ; Instruction type (encoding) as described in the ISA specification.
|
|
87 ; The following table summarizes possible operands of individual instruction
|
|
88 ; types and corresponding constraints.
|
|
89 ;
|
|
90 ; sop2 - scalar, two inputs, one output
|
|
91 ; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
|
|
92 ; vccz,execz,scc,inline immedate,fp inline immediate
|
|
93 ; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
|
|
94 ;
|
|
95 ; Constraints "=SD, SD", "SSA,SSB","SSB,SSA"
|
|
96 ;
|
|
97 ; sopk - scalar, inline constant input, one output
|
|
98 ; simm16: 16bit inline constant
|
|
99 ; sdst: same as sop2/ssrc0
|
|
100 ;
|
|
101 ; Constraints "=SD", "J"
|
|
102 ;
|
|
103 ; sop1 - scalar, one input, one output
|
|
104 ; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ
|
|
105 ; sdst: same as sop2/sdst
|
|
106 ;
|
|
107 ; Constraints "=SD", "SSA"
|
|
108 ;
|
|
109 ; sopc - scalar, two inputs, one comparsion
|
|
110 ; ssrc0: same as sop2/ssc0.
|
|
111 ;
|
|
112 ; Constraints "SSI,SSA","SSA,SSI"
|
|
113 ;
|
|
114 ; sopp - scalar, one constant input, one special
|
|
115 ; simm16
|
|
116 ;
|
|
117 ; smem - scalar memory
|
|
118 ; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in
|
|
119 ; dwords
|
|
120 ; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma
|
|
121 ; offset: sgpr or 20bit unsigned byte offset
|
|
122 ;
|
|
123 ; vop2 - vector, two inputs, one output
|
|
124 ; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec,
|
|
125 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
|
|
126 ; scc, lds, literal constant, vgpr0-255
|
|
127 ; vsrc1: vgpr0-255
|
|
128 ; vdst: vgpr0-255
|
|
129 ; Limitations: At most one SGPR, at most one constant
|
|
130 ; if constant is used, SGPR must be M0
|
|
131 ; Only SRC0 can be LDS_DIRECT
|
|
132 ;
|
|
133 ; constraints: "=v", "vBSv", "v"
|
|
134 ;
|
|
135 ; vop1 - vector, one input, one output
|
|
136 ; vsrc0: same as vop2/src0
|
|
137 ; vdst: vgpr0-255
|
|
138 ;
|
|
139 ; constraints: "=v", "vBSv"
|
|
140 ;
|
|
141 ; vopc - vector, two inputs, one comparsion output;
|
|
142 ; vsrc0: same as vop2/src0
|
|
143 ; vsrc1: vgpr0-255
|
|
144 ; vdst:
|
|
145 ;
|
|
146 ; constraints: "vASv", "v"
|
|
147 ;
|
|
148 ; vop3a - vector, three inputs, one output
|
|
149 ; vdst: vgpr0-255, for v_cmp sgpr or vcc
|
|
150 ; abs,clamp
|
|
151 ; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec,
|
|
152 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
|
|
153 ; scc, lds_direct
|
|
154 ; FIXME: really missing 1/pi? really 104 SGPRs
|
|
155 ;
|
|
156 ; vop3b - vector, three inputs, one vector output, one scalar output
|
|
157 ; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0
|
|
158 ; vdst: vgpr0-255
|
|
159 ; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
|
|
160 ;
|
|
161 ; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
|
|
162 ; src0: vgpr0-255
|
|
163 ; dst_sel: BYTE_0-3, WORD_0-1, DWORD
|
|
164 ; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE
|
|
165 ; clamp: true/false
|
|
166 ; src0_sel: BYTE_0-3, WORD_0-1, DWORD
|
|
167 ; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg,
|
|
168 ; src1_abs
|
|
169 ;
|
|
170 ; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops
|
|
171 ; src0: vgpr0-255
|
|
172 ; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1,
|
|
173 ; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror,
|
|
174 ; bcast15, bcast31
|
|
175 ; flags: src0_neg, src0_abs, src1_neg, src1_abs
|
|
176 ; bank_mask: 4-bit mask
|
|
177 ; row_mask: 4-bit mask
|
|
178 ;
|
|
179 ; ds - Local and global data share instructions.
|
|
180 ; offset0: 8-bit constant
|
|
181 ; offset1: 8-bit constant
|
|
182 ; flag: gds
|
|
183 ; addr: vgpr0-255
|
|
184 ; data0: vgpr0-255
|
|
185 ; data1: vgpr0-255
|
|
186 ; vdst: vgpr0-255
|
|
187 ;
|
|
188 ; mubuf - Untyped memory buffer operation. First word with LDS, second word
|
|
189 ; non-LDS.
|
|
190 ; offset: 12-bit constant
|
|
191 ; vaddr: vgpr0-255
|
|
192 ; vdata: vgpr0-255
|
|
193 ; srsrc: sgpr0-102
|
|
194 ; soffset: sgpr0-102
|
|
195 ; flags: offen, idxen, glc, lds, slc, tfe
|
|
196 ;
|
|
197 ; mtbuf - Typed memory buffer operation. Two words
|
|
198 ; offset: 12-bit constant
|
|
199 ; dfmt: 4-bit constant
|
|
200 ; nfmt: 3-bit constant
|
|
201 ; vaddr: vgpr0-255
|
|
202 ; vdata: vgpr0-255
|
|
203 ; srsrc: sgpr0-102
|
|
204 ; soffset: sgpr0-102
|
|
205 ; flags: offen, idxen, glc, lds, slc, tfe
|
|
206 ;
|
|
207 ; flat - flat or global memory operations
|
|
208 ; flags: glc, slc
|
|
209 ; addr: vgpr0-255
|
|
210 ; data: vgpr0-255
|
|
211 ; vdst: vgpr0-255
|
|
212 ;
|
|
213 ; mult - expands to multiple instructions (pseudo encoding)
|
|
214 ;
|
|
215 ; vmult - as mult, when a vector instruction is used.
|
|
216
|
|
217 (define_attr "type"
|
|
218 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
|
|
219 vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
|
|
220 (const_string "unknown"))
|
|
221
|
|
222 ; Set if instruction is executed in scalar or vector unit
|
|
223
|
|
224 (define_attr "unit" "unknown,scalar,vector"
|
|
225 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
|
|
226 (const_string "scalar")
|
|
227 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,
|
|
228 vop_sdwa,vop_dpp,flat,vmult")
|
|
229 (const_string "vector")]
|
|
230 (const_string "unknown")))
|
|
231
|
|
232 ; All vector instructions run as 64 threads as predicated by the EXEC
|
|
233 ; register. Scalar operations in vector register require a single lane
|
|
234 ; enabled, vector moves require a full set of lanes enabled, and most vector
|
|
235 ; operations handle the lane masking themselves.
|
|
236 ; The md_reorg pass is responsible for ensuring that EXEC is set appropriately
|
|
237 ; according to the following settings:
|
|
238 ; auto - md_reorg will inspect def/use to determine what to do.
|
|
239 ; none - exec is not needed.
|
|
240 ; single - disable all but lane zero.
|
|
241 ; full - enable all lanes.
|
|
242
|
|
243 (define_attr "exec" "auto,none,single,full"
|
|
244 (const_string "auto"))
|
|
245
|
|
246 ; Infer the (worst-case) length from the instruction type by default. Many
|
|
247 ; types can have an optional immediate word following, which we include here.
|
|
248 ; "Multiple" types are counted as two 64-bit instructions. This is just a
|
|
249 ; default fallback: it can be overridden per-alternative in insn patterns for
|
|
250 ; greater accuracy.
|
|
251
|
|
252 (define_attr "length" ""
|
|
253 (cond [(eq_attr "type" "sop1") (const_int 8)
|
|
254 (eq_attr "type" "sop2") (const_int 8)
|
|
255 (eq_attr "type" "sopk") (const_int 8)
|
|
256 (eq_attr "type" "sopc") (const_int 8)
|
|
257 (eq_attr "type" "sopp") (const_int 4)
|
|
258 (eq_attr "type" "smem") (const_int 8)
|
|
259 (eq_attr "type" "ds") (const_int 8)
|
|
260 (eq_attr "type" "vop1") (const_int 8)
|
|
261 (eq_attr "type" "vop2") (const_int 8)
|
|
262 (eq_attr "type" "vopc") (const_int 8)
|
|
263 (eq_attr "type" "vop3a") (const_int 8)
|
|
264 (eq_attr "type" "vop3b") (const_int 8)
|
|
265 (eq_attr "type" "vop_sdwa") (const_int 8)
|
|
266 (eq_attr "type" "vop_dpp") (const_int 8)
|
|
267 (eq_attr "type" "flat") (const_int 8)
|
|
268 (eq_attr "type" "mult") (const_int 16)
|
|
269 (eq_attr "type" "vmult") (const_int 16)]
|
|
270 (const_int 4)))
|
|
271
|
|
272 ; Disable alternatives that only apply to specific ISA variants.
|
|
273
|
|
274 (define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
|
|
275
|
|
276 (define_attr "enabled" ""
|
|
277 (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
|
|
278 (and (eq_attr "gcn_version" "gcn5")
|
|
279 (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
|
|
280 (const_int 1)]
|
|
281 (const_int 0)))
|
|
282
|
|
283 ; We need to be able to identify v_readlane and v_writelane with
|
|
284 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
|
|
285
|
|
286 (define_attr "laneselect" "yes,no" (const_string "no"))
|
|
287
|
|
288 ; Identify instructions that require a "Manually Inserted Wait State" if
|
|
289 ; their inputs are overwritten by subsequent instructions.
|
|
290
|
|
291 (define_attr "delayeduse" "yes,no" (const_string "no"))
|
|
292
|
|
293 ;; }}}
|
|
294 ;; {{{ Iterators useful across the wole machine description
|
|
295
|
|
296 (define_mode_iterator SIDI [SI DI])
|
|
297 (define_mode_iterator SFDF [SF DF])
|
|
298 (define_mode_iterator SISF [SI SF])
|
|
299 (define_mode_iterator QIHI [QI HI])
|
|
300 (define_mode_iterator DIDF [DI DF])
|
|
301
|
|
302 ;; }}}
|
|
303 ;; {{{ Attributes.
|
|
304
|
|
305 ; Translate RTX code into GCN instruction mnemonics with and without
|
|
306 ; suffixes such as _b32, etc.
|
|
307
|
|
308 (define_code_attr mnemonic
|
|
309 [(minus "sub%i")
|
|
310 (plus "add%i")
|
|
311 (ashift "lshl%b")
|
|
312 (lshiftrt "lshr%b")
|
|
313 (ashiftrt "ashr%i")
|
|
314 (and "and%B")
|
|
315 (ior "or%B")
|
|
316 (xor "xor%B")
|
|
317 (mult "mul%i")
|
|
318 (smin "min%i")
|
|
319 (smax "max%i")
|
|
320 (umin "min%u")
|
|
321 (umax "max%u")
|
|
322 (not "not%b")
|
|
323 (popcount "bcnt_u32%b")])
|
|
324
|
|
325 (define_code_attr bare_mnemonic
|
|
326 [(plus "add")
|
|
327 (minus "sub")
|
|
328 (and "and")
|
|
329 (ior "or")
|
|
330 (xor "xor")])
|
|
331
|
|
332 (define_code_attr s_mnemonic
|
|
333 [(not "not%b")
|
|
334 (popcount "bcnt1_i32%b")
|
|
335 (clz "flbit_i32%b")
|
|
336 (ctz "ff1_i32%b")])
|
|
337
|
|
338 (define_code_attr revmnemonic
|
|
339 [(minus "subrev%i")
|
|
340 (ashift "lshlrev%b")
|
|
341 (lshiftrt "lshrrev%b")
|
|
342 (ashiftrt "ashrrev%i")])
|
|
343
|
|
344 ; Translate RTX code into corresponding expander name.
|
|
345
|
|
346 (define_code_attr expander
|
|
347 [(and "and")
|
|
348 (ior "ior")
|
|
349 (xor "xor")
|
|
350 (plus "add")
|
|
351 (minus "sub")
|
|
352 (ashift "ashl")
|
|
353 (lshiftrt "lshr")
|
|
354 (ashiftrt "ashr")
|
|
355 (mult "mul")
|
|
356 (smin "smin")
|
|
357 (smax "smax")
|
|
358 (umin "umin")
|
|
359 (umax "umax")
|
|
360 (not "one_cmpl")
|
|
361 (popcount "popcount")
|
|
362 (clz "clz")
|
|
363 (ctz "ctz")
|
|
364 (sign_extend "extend")
|
|
365 (zero_extend "zero_extend")])
|
|
366
|
|
367 ;; }}}
|
|
368 ;; {{{ Miscellaneous instructions
|
|
369
|
|
370 (define_insn "nop"
|
|
371 [(const_int 0)]
|
|
372 ""
|
|
373 "s_nop\t0x0"
|
|
374 [(set_attr "type" "sopp")])
|
|
375
|
|
376 ; FIXME: What should the value of the immediate be? Zero is disallowed, so
|
|
377 ; pick 1 for now.
|
|
378 (define_insn "trap"
|
|
379 [(trap_if (const_int 1) (const_int 0))]
|
|
380 ""
|
|
381 "s_trap\t1"
|
|
382 [(set_attr "type" "sopp")])
|
|
383
|
|
384 ;; }}}
|
|
385 ;; {{{ Moves
|
|
386
|
|
387 ;; All scalar modes we support moves in.
|
|
388 (define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF])
|
|
389
|
|
390 ; This is the entry point for creating all kinds of scalar moves,
|
|
391 ; including reloads and symbols.
|
|
392
|
|
393 (define_expand "mov<mode>"
|
|
394 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
|
|
395 (match_operand:MOV_MODE 1 "general_operand"))]
|
|
396 ""
|
|
397 {
|
|
398 if (MEM_P (operands[0]))
|
|
399 operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
400
|
|
401 if (!lra_in_progress && !reload_completed
|
|
402 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1]))
|
|
403 {
|
|
404 /* Something is probably trying to generate a move
|
|
405 which can only work indirectly.
|
|
406 E.g. Move from LDS memory to SGPR hardreg
|
|
407 or MEM:QI to SGPR. */
|
|
408 rtx tmpreg = gen_reg_rtx (<MODE>mode);
|
|
409 emit_insn (gen_mov<mode> (tmpreg, operands[1]));
|
|
410 emit_insn (gen_mov<mode> (operands[0], tmpreg));
|
|
411 DONE;
|
|
412 }
|
|
413
|
|
414 if (<MODE>mode == DImode
|
|
415 && (GET_CODE (operands[1]) == SYMBOL_REF
|
|
416 || GET_CODE (operands[1]) == LABEL_REF))
|
|
417 {
|
|
418 if (lra_in_progress)
|
|
419 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1]));
|
|
420 else
|
|
421 emit_insn (gen_movdi_symbol (operands[0], operands[1]));
|
|
422 DONE;
|
|
423 }
|
|
424 })
|
|
425
|
|
426 ; Split invalid moves into two valid moves
|
|
427
|
|
428 (define_split
|
|
429 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
|
|
430 (match_operand:MOV_MODE 1 "general_operand"))]
|
|
431 "!reload_completed && !lra_in_progress
|
|
432 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
|
|
433 [(set (match_dup 2) (match_dup 1))
|
|
434 (set (match_dup 0) (match_dup 2))]
|
|
435 {
|
|
436 operands[2] = gen_reg_rtx(<MODE>mode);
|
|
437 })
|
|
438
|
|
439 ; We need BImode move so we can reload flags registers.
|
|
440
|
|
441 (define_insn "*movbi"
|
|
442 [(set (match_operand:BI 0 "nonimmediate_operand"
|
|
443 "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
|
|
444 (match_operand:BI 1 "gcn_load_operand"
|
|
445 "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
|
|
446 ""
|
|
447 {
|
|
448 /* SCC as an operand is currently not accepted by the LLVM assembler, so
|
|
449 we emit bytes directly as a workaround. */
|
|
450 switch (which_alternative) {
|
|
451 case 0:
|
|
452 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
|
|
453 return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;"
|
|
454 ".byte\t0xfd\;"
|
|
455 ".byte\t0x0\;"
|
|
456 ".byte\t0x80|%R0\;"
|
|
457 ".byte\t0xbe";
|
|
458 else
|
|
459 return "s_mov_b32\t%0, %1";
|
|
460 case 1:
|
|
461 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
|
|
462 return "; v_mov_b32\t%0, %1\;"
|
|
463 ".byte\t0xfd\;"
|
|
464 ".byte\t0x2\;"
|
|
465 ".byte\t((%V0<<1)&0xff)\;"
|
|
466 ".byte\t0x7e|(%V0>>7)";
|
|
467 else
|
|
468 return "v_mov_b32\t%0, %1";
|
|
469 case 2:
|
|
470 return "v_readlane_b32\t%0, %1, 0";
|
|
471 case 3:
|
|
472 return "s_cmpk_lg_u32\t%1, 0";
|
|
473 case 4:
|
|
474 return "v_cmp_ne_u32\tvcc, 0, %1";
|
|
475 case 5:
|
|
476 if (REGNO (operands[1]) == SCC_REG)
|
|
477 return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;"
|
|
478 ".byte\t0xfd\;"
|
|
479 ".byte\t0x0\;"
|
|
480 ".byte\t0xea\;"
|
|
481 ".byte\t0xbe\;"
|
|
482 "s_mov_b32\tvcc_hi, 0";
|
|
483 else
|
|
484 return "s_mov_b32\tvcc_lo, %1\;"
|
|
485 "s_mov_b32\tvcc_hi, 0";
|
|
486 case 6:
|
|
487 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
|
|
488 case 7:
|
|
489 return "s_store_dword\t%1, %A0";
|
|
490 case 8:
|
|
491 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
|
|
492 case 9:
|
|
493 return "flat_store_dword\t%A0, %1%O0%g0";
|
|
494 case 10:
|
|
495 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
|
|
496 case 11:
|
|
497 return "global_store_dword\t%A0, %1%O0%g0";
|
|
498 default:
|
|
499 gcc_unreachable ();
|
|
500 }
|
|
501 }
|
|
502 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
|
|
503 flat,flat")
|
|
504 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
|
|
505 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
|
|
506
|
|
507 ; 32bit move pattern
|
|
508
|
|
509 (define_insn "*mov<mode>_insn"
|
|
510 [(set (match_operand:SISF 0 "nonimmediate_operand"
|
|
511 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
|
|
512 (match_operand:SISF 1 "gcn_load_operand"
|
|
513 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
|
|
514 ""
|
|
515 "@
|
|
516 s_mov_b32\t%0, %1
|
|
517 s_movk_i32\t%0, %1
|
|
518 s_mov_b32\t%0, %1
|
|
519 s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
|
|
520 s_buffer_store%s1\t%1, s[0:3], %0
|
|
521 s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
|
522 s_store_dword\t%1, %A0
|
|
523 v_mov_b32\t%0, %1
|
|
524 v_readlane_b32\t%0, %1, 0
|
|
525 v_writelane_b32\t%0, %1, 0
|
|
526 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
|
|
527 flat_store_dword\t%A0, %1%O0%g0
|
|
528 v_mov_b32\t%0, %1
|
|
529 ds_write_b32\t%A0, %1%O0
|
|
530 ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
|
531 s_mov_b32\t%0, %1
|
|
532 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
|
533 global_store_dword\t%A0, %1%O0%g0"
|
|
534 [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
|
|
535 flat,vop1,ds,ds,sop1,flat,flat")
|
|
536 (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
|
|
537 (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
|
|
538
|
|
539 ; 8/16bit move pattern
|
|
540
|
|
541 (define_insn "*mov<mode>_insn"
|
|
542 [(set (match_operand:QIHI 0 "nonimmediate_operand"
|
|
543 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
|
|
544 (match_operand:QIHI 1 "gcn_load_operand"
|
|
545 "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
|
|
546 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
|
|
547 "@
|
|
548 s_mov_b32\t%0, %1
|
|
549 s_movk_i32\t%0, %1
|
|
550 s_mov_b32\t%0, %1
|
|
551 v_mov_b32\t%0, %1
|
|
552 v_readlane_b32\t%0, %1, 0
|
|
553 v_writelane_b32\t%0, %1, 0
|
|
554 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
|
|
555 flat_store%s0\t%A0, %1%O0%g0
|
|
556 v_mov_b32\t%0, %1
|
|
557 ds_write%b0\t%A0, %1%O0
|
|
558 ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
|
559 global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
|
560 global_store%s0\t%A0, %1%O0%g0"
|
|
561 [(set_attr "type"
|
|
562 "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
|
|
563 (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
|
|
564 (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
|
|
565
|
|
566 ; 64bit move pattern
|
|
567
|
|
568 (define_insn_and_split "*mov<mode>_insn"
|
|
569 [(set (match_operand:DIDF 0 "nonimmediate_operand"
|
|
570 "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
|
|
571 (match_operand:DIDF 1 "general_operand"
|
|
572 "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
|
|
573 "GET_CODE(operands[1]) != SYMBOL_REF"
|
|
574 "@
|
|
575 s_mov_b64\t%0, %1
|
|
576 s_mov_b64\t%0, %1
|
|
577 #
|
|
578 s_store_dwordx2\t%1, %A0
|
|
579 s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
|
580 #
|
|
581 #
|
|
582 #
|
|
583 #
|
|
584 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
|
|
585 flat_store_dwordx2\t%A0, %1%O0%g0
|
|
586 ds_write_b64\t%A0, %1%O0
|
|
587 ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
|
588 global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
|
589 global_store_dwordx2\t%A0, %1%O0%g0"
|
|
590 "(reload_completed && !MEM_P (operands[0]) && !MEM_P (operands[1])
|
|
591 && !gcn_sgpr_move_p (operands[0], operands[1]))
|
|
592 || (GET_CODE (operands[1]) == CONST_INT && !gcn_constant64_p (operands[1]))"
|
|
593 [(set (match_dup 0) (match_dup 1))
|
|
594 (set (match_dup 2) (match_dup 3))]
|
|
595 {
|
|
596 rtx inlo = gen_lowpart (SImode, operands[1]);
|
|
597 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]);
|
|
598 rtx outlo = gen_lowpart (SImode, operands[0]);
|
|
599 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]);
|
|
600
|
|
601 /* Ensure that overlapping registers aren't corrupted. */
|
|
602 if (REGNO (outlo) == REGNO (inhi))
|
|
603 {
|
|
604 operands[0] = outhi;
|
|
605 operands[1] = inhi;
|
|
606 operands[2] = outlo;
|
|
607 operands[3] = inlo;
|
|
608 }
|
|
609 else
|
|
610 {
|
|
611 operands[0] = outlo;
|
|
612 operands[1] = inlo;
|
|
613 operands[2] = outhi;
|
|
614 operands[3] = inhi;
|
|
615 }
|
|
616 }
|
|
617 [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
|
|
618 flat,ds,ds,flat,flat")
|
|
619 (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
|
|
620
|
|
621 ; 128-bit move.
|
|
622
|
|
623 (define_insn_and_split "*movti_insn"
|
|
624 [(set (match_operand:TI 0 "nonimmediate_operand"
|
|
625 "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
|
|
626 (match_operand:TI 1 "general_operand"
|
|
627 "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
|
|
628 ""
|
|
629 "@
|
|
630 #
|
|
631 s_store_dwordx4\t%1, %A0
|
|
632 s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
|
633 flat_store_dwordx4\t%A0, %1%O0%g0
|
|
634 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
|
|
635 #
|
|
636 #
|
|
637 #
|
|
638 global_store_dwordx4\t%A0, %1%O0%g0
|
|
639 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
|
640 ds_write_b128\t%A0, %1%O0
|
|
641 ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
|
|
642 "reload_completed
|
|
643 && REG_P (operands[0])
|
|
644 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
|
|
645 [(set (match_dup 0) (match_dup 1))
|
|
646 (set (match_dup 2) (match_dup 3))
|
|
647 (set (match_dup 4) (match_dup 5))
|
|
648 (set (match_dup 6) (match_dup 7))]
|
|
649 {
|
|
650 operands[6] = gcn_operand_part (TImode, operands[0], 3);
|
|
651 operands[7] = gcn_operand_part (TImode, operands[1], 3);
|
|
652 operands[4] = gcn_operand_part (TImode, operands[0], 2);
|
|
653 operands[5] = gcn_operand_part (TImode, operands[1], 2);
|
|
654 operands[2] = gcn_operand_part (TImode, operands[0], 1);
|
|
655 operands[3] = gcn_operand_part (TImode, operands[1], 1);
|
|
656 operands[0] = gcn_operand_part (TImode, operands[0], 0);
|
|
657 operands[1] = gcn_operand_part (TImode, operands[1], 0);
|
|
658 }
|
|
659 [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
|
|
660 ds,ds")
|
|
661 (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
|
|
662 (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
|
|
663
|
|
664 ;; }}}
|
|
665 ;; {{{ Prologue/Epilogue
|
|
666
|
|
667 (define_insn "prologue_use"
|
|
668 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
|
|
669 ""
|
|
670 ""
|
|
671 [(set_attr "length" "0")])
|
|
672
|
|
673 (define_expand "prologue"
|
|
674 [(const_int 0)]
|
|
675 ""
|
|
676 {
|
|
677 gcn_expand_prologue ();
|
|
678 DONE;
|
|
679 })
|
|
680
|
|
681 (define_expand "epilogue"
|
|
682 [(const_int 0)]
|
|
683 ""
|
|
684 {
|
|
685 gcn_expand_epilogue ();
|
|
686 DONE;
|
|
687 })
|
|
688
|
|
689 ;; }}}
|
|
690 ;; {{{ Control flow
|
|
691
|
|
692 ; This pattern must satisfy simplejump_p, which means it cannot be a parallel
|
|
693 ; that clobbers SCC. Thus, we must preserve SCC if we're generating a long
|
|
694 ; branch sequence.
|
|
695
|
|
696 (define_insn "jump"
|
|
697 [(set (pc)
|
|
698 (label_ref (match_operand 0)))]
|
|
699 ""
|
|
700 {
|
|
701 if (get_attr_length (insn) == 4)
|
|
702 return "s_branch\t%0";
|
|
703 else
|
|
704 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */
|
|
705 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
|
|
706 ".long\t0xbe9600fd\;"
|
|
707 "s_getpc_b64\ts[20:21]\;"
|
|
708 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
|
|
709 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
|
|
710 "s_cmpk_lg_u32\ts22, 0\;"
|
|
711 "s_setpc_b64\ts[20:21]";
|
|
712 }
|
|
713 [(set_attr "type" "sopp")
|
|
714 (set (attr "length")
|
|
715 (if_then_else (and (ge (minus (match_dup 0) (pc))
|
|
716 (const_int -131072))
|
|
717 (lt (minus (match_dup 0) (pc))
|
|
718 (const_int 131072)))
|
|
719 (const_int 4)
|
|
720 (const_int 32)))])
|
|
721
|
|
722 (define_insn "indirect_jump"
|
|
723 [(set (pc)
|
|
724 (match_operand:DI 0 "register_operand" "Sg"))]
|
|
725 ""
|
|
726 "s_setpc_b64\t%0"
|
|
727 [(set_attr "type" "sop1")
|
|
728 (set_attr "length" "4")])
|
|
729
|
|
730 (define_insn "cjump"
|
|
731 [(set (pc)
|
|
732 (if_then_else
|
|
733 (match_operator:BI 1 "gcn_conditional_operator"
|
|
734 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV")
|
|
735 (const_int 0)])
|
|
736 (label_ref (match_operand 0))
|
|
737 (pc)))]
|
|
738 ""
|
|
739 {
|
|
740 if (get_attr_length (insn) == 4)
|
|
741 return "s_cbranch%C1\t%0";
|
|
742 else
|
|
743 {
|
|
744 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but
|
|
745 restores SCC. */
|
|
746 if (REGNO (operands[2]) == SCC_REG)
|
|
747 {
|
|
748 if (GET_CODE (operands[1]) == EQ)
|
|
749 return "s_cbranch%c1\t.Lskip%=\;"
|
|
750 "s_getpc_b64\ts[20:21]\;"
|
|
751 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
|
|
752 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
|
|
753 "s_cmp_lg_u32\t0, 0\;"
|
|
754 "s_setpc_b64\ts[20:21]\n"
|
|
755 ".Lskip%=:";
|
|
756 else
|
|
757 return "s_cbranch%c1\t.Lskip%=\;"
|
|
758 "s_getpc_b64\ts[20:21]\;"
|
|
759 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
|
|
760 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
|
|
761 "s_cmp_eq_u32\t0, 0\;"
|
|
762 "s_setpc_b64\ts[20:21]\n"
|
|
763 ".Lskip%=:";
|
|
764 }
|
|
765 else
|
|
766 return "s_cbranch%c1\t.Lskip%=\;"
|
|
767 "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
|
|
768 ".byte\t0xfd\;"
|
|
769 ".byte\t0x0\;"
|
|
770 ".byte\t0x80|22\;"
|
|
771 ".byte\t0xbe\;"
|
|
772 "s_getpc_b64\ts[20:21]\;"
|
|
773 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
|
|
774 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
|
|
775 "s_cmpk_lg_u32\ts22, 0\;"
|
|
776 "s_setpc_b64\ts[20:21]\n"
|
|
777 ".Lskip%=:";
|
|
778 }
|
|
779 }
|
|
780 [(set_attr "type" "sopp")
|
|
781 (set (attr "length")
|
|
782 (if_then_else (and (ge (minus (match_dup 0) (pc))
|
|
783 (const_int -131072))
|
|
784 (lt (minus (match_dup 0) (pc))
|
|
785 (const_int 131072)))
|
|
786 (const_int 4)
|
|
787 (const_int 36)))])
|
|
788
|
|
789 ; Returning from a normal function is different to returning from a
|
|
790 ; kernel function.
|
|
791
|
|
792 (define_insn "gcn_return"
|
|
793 [(return)]
|
|
794 ""
|
|
795 {
|
|
796 if (cfun && cfun->machine && cfun->machine->normal_function)
|
|
797 return "s_setpc_b64\ts[18:19]";
|
|
798 else
|
|
799 return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm";
|
|
800 }
|
|
801 [(set_attr "type" "sop1")
|
|
802 (set_attr "length" "12")])
|
|
803
|
|
804 (define_expand "call"
|
|
805 [(parallel [(call (match_operand 0 "")
|
|
806 (match_operand 1 ""))
|
|
807 (clobber (reg:DI LR_REGNUM))
|
|
808 (clobber (match_scratch:DI 2))])]
|
|
809 ""
|
|
810 {})
|
|
811
|
|
812 (define_insn "gcn_simple_call"
|
|
813 [(call (mem (match_operand 0 "immediate_operand" "Y,B"))
|
|
814 (match_operand 1 "const_int_operand"))
|
|
815 (clobber (reg:DI LR_REGNUM))
|
|
816 (clobber (match_scratch:DI 2 "=&Sg,X"))]
|
|
817 ""
|
|
818 "@
|
|
819 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2
|
|
820 s_swappc_b64\ts[18:19], %0"
|
|
821 [(set_attr "type" "mult,sop1")
|
|
822 (set_attr "length" "24,4")])
|
|
823
|
|
824 (define_insn "movdi_symbol"
|
|
825 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
|
|
826 (match_operand:DI 1 "general_operand" "Y"))
|
|
827 (clobber (reg:BI SCC_REG))]
|
|
828 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
|
|
829 {
|
|
830 if (SYMBOL_REF_P (operands[1])
|
|
831 && SYMBOL_REF_WEAK (operands[1]))
|
|
832 return "s_getpc_b64\t%0\;"
|
|
833 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
|
|
834 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
|
|
835 "s_load_dwordx2\t%0, %0\;"
|
|
836 "s_waitcnt\tlgkmcnt(0)";
|
|
837
|
|
838 return "s_getpc_b64\t%0\;"
|
|
839 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
|
|
840 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4";
|
|
841 }
|
|
842 [(set_attr "type" "mult")
|
|
843 (set_attr "length" "32")])
|
|
844
|
|
845 (define_insn "movdi_symbol_save_scc"
|
|
846 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
|
|
847 (match_operand:DI 1 "general_operand" "Y"))
|
|
848 (clobber (reg:BI CC_SAVE_REG))]
|
|
849 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF)
|
|
850 && (lra_in_progress || reload_completed)"
|
|
851 {
|
|
852 /* !!! These sequences clobber CC_SAVE_REG. */
|
|
853
|
|
854 if (SYMBOL_REF_P (operands[1])
|
|
855 && SYMBOL_REF_WEAK (operands[1]))
|
|
856 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
|
|
857 ".long\t0xbe9600fd\;"
|
|
858 "s_getpc_b64\t%0\;"
|
|
859 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
|
|
860 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
|
|
861 "s_load_dwordx2\t%0, %0\;"
|
|
862 "s_cmpk_lg_u32\ts22, 0\;"
|
|
863 "s_waitcnt\tlgkmcnt(0)";
|
|
864
|
|
865 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
|
|
866 ".long\t0xbe9600fd\;"
|
|
867 "s_getpc_b64\t%0\;"
|
|
868 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
|
|
869 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;"
|
|
870 "s_cmpk_lg_u32\ts22, 0";
|
|
871 }
|
|
872 [(set_attr "type" "mult")
|
|
873 (set_attr "length" "40")])
|
|
874
|
|
875
|
|
876 (define_insn "gcn_indirect_call"
|
|
877 [(call (mem (match_operand:DI 0 "register_operand" "Sg"))
|
|
878 (match_operand 1 "" ""))
|
|
879 (clobber (reg:DI LR_REGNUM))
|
|
880 (clobber (match_scratch:DI 2 "=X"))]
|
|
881 ""
|
|
882 "s_swappc_b64\ts[18:19], %0"
|
|
883 [(set_attr "type" "sop1")
|
|
884 (set_attr "length" "4")])
|
|
885
|
|
886 (define_expand "call_value"
|
|
887 [(parallel [(set (match_operand 0 "")
|
|
888 (call (match_operand 1 "")
|
|
889 (match_operand 2 "")))
|
|
890 (clobber (reg:DI LR_REGNUM))
|
|
891 (clobber (match_scratch:DI 3))])]
|
|
892 ""
|
|
893 {})
|
|
894
|
|
895 (define_insn "gcn_call_value"
|
|
896 [(set (match_operand 0 "register_operand" "=Sg,Sg")
|
|
897 (call (mem (match_operand 1 "immediate_operand" "Y,B"))
|
|
898 (match_operand 2 "const_int_operand")))
|
|
899 (clobber (reg:DI LR_REGNUM))
|
|
900 (clobber (match_scratch:DI 3 "=&Sg,X"))]
|
|
901 ""
|
|
902 "@
|
|
903 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
|
|
904 s_swappc_b64\ts[18:19], %1"
|
|
905 [(set_attr "type" "sop1")
|
|
906 (set_attr "length" "24")])
|
|
907
|
|
908 (define_insn "gcn_call_value_indirect"
|
|
909 [(set (match_operand 0 "register_operand" "=Sg")
|
|
910 (call (mem (match_operand:DI 1 "register_operand" "Sg"))
|
|
911 (match_operand 2 "" "")))
|
|
912 (clobber (reg:DI LR_REGNUM))
|
|
913 (clobber (match_scratch:DI 3 "=X"))]
|
|
914 ""
|
|
915 "s_swappc_b64\ts[18:19], %1"
|
|
916 [(set_attr "type" "sop1")
|
|
917 (set_attr "length" "4")])
|
|
918
|
|
919 ; GCN does not have an instruction to clear only part of the instruction
|
|
920 ; cache, so the operands are ignored.
|
|
921
|
|
922 (define_insn "clear_icache"
|
|
923 [(unspec_volatile
|
|
924 [(match_operand 0 "") (match_operand 1 "")]
|
|
925 UNSPECV_ICACHE_INV)]
|
|
926 ""
|
|
927 "s_icache_inv"
|
|
928 [(set_attr "type" "sopp")
|
|
929 (set_attr "length" "4")])
|
|
930
|
|
931 ;; }}}
|
|
932 ;; {{{ Conditionals
|
|
933
|
|
934 ; 32-bit compare, scalar unit only
|
|
935
|
|
936 (define_insn "cstoresi4"
|
|
937 [(set (match_operand:BI 0 "gcn_conditional_register_operand"
|
|
938 "=cs, cs, cs, cs")
|
|
939 (match_operator:BI 1 "gcn_compare_operator"
|
|
940 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS")
|
|
941 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))]
|
|
942 ""
|
|
943 "@
|
|
944 s_cmp%D1\t%2, %3
|
|
945 s_cmpk%D1\t%2, %3
|
|
946 s_cmp%D1\t%2, %3
|
|
947 s_cmp%D1\t%2, %3"
|
|
948 [(set_attr "type" "sopc,sopk,sopk,sopk")
|
|
949 (set_attr "length" "4,4,8,8")])
|
|
950
|
|
951 (define_expand "cbranchsi4"
|
|
952 [(match_operator 0 "gcn_compare_operator"
|
|
953 [(match_operand:SI 1 "gcn_alu_operand")
|
|
954 (match_operand:SI 2 "gcn_alu_operand")])
|
|
955 (match_operand 3)]
|
|
956 ""
|
|
957 {
|
|
958 rtx cc = gen_reg_rtx (BImode);
|
|
959 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2]));
|
|
960 emit_jump_insn (gen_cjump (operands[3],
|
|
961 gen_rtx_NE (BImode, cc, const0_rtx), cc));
|
|
962 DONE;
|
|
963 })
|
|
964
|
|
965 ; 64-bit compare; either unit, but scalar allows limited operators
|
|
966
|
|
967 (define_expand "cstoredi4"
|
|
968 [(set (match_operand:BI 0 "gcn_conditional_register_operand")
|
|
969 (match_operator:BI 1 "gcn_compare_operator"
|
|
970 [(match_operand:DI 2 "gcn_alu_operand")
|
|
971 (match_operand:DI 3 "gcn_alu_operand")]))]
|
|
972 ""
|
|
973 {})
|
|
974
|
|
975 (define_insn "cstoredi4_vec_and_scalar"
|
|
976 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV")
|
|
977 (match_operator:BI 1 "gcn_compare_64bit_operator"
|
|
978 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC")
|
|
979 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))]
|
|
980 ""
|
|
981 "@
|
|
982 s_cmp%D1\t%2, %3
|
|
983 v_cmp%E1\tvcc, %2, %3"
|
|
984 [(set_attr "type" "sopc,vopc")
|
|
985 (set_attr "length" "8")])
|
|
986
|
|
987 (define_insn "cstoredi4_vector"
|
|
988 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV")
|
|
989 (match_operator:BI 1 "gcn_compare_operator"
|
|
990 [(match_operand:DI 2 "gcn_alu_operand" "vSvB")
|
|
991 (match_operand:DI 3 "gcn_alu_operand" " v")]))]
|
|
992 ""
|
|
993 "v_cmp%E1\tvcc, %2, %3"
|
|
994 [(set_attr "type" "vopc")
|
|
995 (set_attr "length" "8")])
|
|
996
|
|
997 (define_expand "cbranchdi4"
|
|
998 [(match_operator 0 "gcn_compare_operator"
|
|
999 [(match_operand:DI 1 "gcn_alu_operand")
|
|
1000 (match_operand:DI 2 "gcn_alu_operand")])
|
|
1001 (match_operand 3)]
|
|
1002 ""
|
|
1003 {
|
|
1004 rtx cc = gen_reg_rtx (BImode);
|
|
1005 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2]));
|
|
1006 emit_jump_insn (gen_cjump (operands[3],
|
|
1007 gen_rtx_NE (BImode, cc, const0_rtx), cc));
|
|
1008 DONE;
|
|
1009 })
|
|
1010
|
|
1011 ; FP compare; vector unit only
|
|
1012
|
|
1013 (define_insn "cstore<mode>4"
|
|
1014 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV")
|
|
1015 (match_operator:BI 1 "gcn_fp_compare_operator"
|
|
1016 [(match_operand:SFDF 2 "gcn_alu_operand" "vB")
|
|
1017 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))]
|
|
1018 ""
|
|
1019 "v_cmp%E1\tvcc, %2, %3"
|
|
1020 [(set_attr "type" "vopc")
|
|
1021 (set_attr "length" "8")])
|
|
1022
|
|
1023 (define_expand "cbranch<mode>4"
|
|
1024 [(match_operator 0 "gcn_fp_compare_operator"
|
|
1025 [(match_operand:SFDF 1 "gcn_alu_operand")
|
|
1026 (match_operand:SFDF 2 "gcn_alu_operand")])
|
|
1027 (match_operand 3)]
|
|
1028 ""
|
|
1029 {
|
|
1030 rtx cc = gen_reg_rtx (BImode);
|
|
1031 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2]));
|
|
1032 emit_jump_insn (gen_cjump (operands[3],
|
|
1033 gen_rtx_NE (BImode, cc, const0_rtx), cc));
|
|
1034 DONE;
|
|
1035 })
|
|
1036
|
|
1037 ;; }}}
|
|
1038 ;; {{{ ALU special cases: Plus
|
|
1039
|
|
1040 (define_insn "addsi3"
|
|
1041 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v")
|
|
1042 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
|
|
1043 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv")))
|
|
1044 (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))
|
|
1045 (clobber (match_scratch:DI 4 "= X, X, X, cV"))]
|
|
1046 ""
|
|
1047 "@
|
|
1048 s_add_i32\t%0, %1, %2
|
|
1049 s_addk_i32\t%0, %2
|
|
1050 s_add_i32\t%0, %1, %2
|
|
1051 v_add%^_u32\t%0, vcc, %2, %1"
|
|
1052 [(set_attr "type" "sop2,sopk,sop2,vop2")
|
|
1053 (set_attr "length" "4,4,8,8")])
|
|
1054
|
|
1055 (define_expand "addsi3_scc"
|
|
1056 [(parallel [(set (match_operand:SI 0 "register_operand")
|
|
1057 (plus:SI (match_operand:SI 1 "gcn_alu_operand")
|
|
1058 (match_operand:SI 2 "gcn_alu_operand")))
|
|
1059 (clobber (reg:BI SCC_REG))
|
|
1060 (clobber (scratch:DI))])]
|
|
1061 ""
|
|
1062 {})
|
|
1063
|
|
1064 ; Having this as an insn_and_split allows us to keep together DImode adds
|
|
1065 ; through some RTL optimisation passes, and means the CC reg we set isn't
|
|
1066 ; dependent on the constraint alternative (which doesn't seem to work well).
|
|
1067
|
|
1068 ; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
|
|
1069 ; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
|
|
1070
|
|
1071 ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
|
|
1072 ; used as an operand due to the read of VCC, so we restrict constants to the
|
|
1073 ; inlinable range for that alternative.
|
|
1074
|
|
1075 (define_insn_and_split "adddi3"
|
|
1076 [(set (match_operand:DI 0 "register_operand"
|
|
1077 "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
|
|
1078 (plus:DI (match_operand:DI 1 "register_operand"
|
|
1079 " Sg, 0, 0, Sg, v, 0, 0, v")
|
|
1080 (match_operand:DI 2 "nonmemory_operand"
|
|
1081 " 0,SgB, 0,SgB, 0,vA, 0,vA")))
|
|
1082 (clobber (match_scratch:BI 3 "= cs, cs, cs, cs, X, X, X, X"))
|
|
1083 (clobber (match_scratch:DI 4 "= X, X, X, X,cV,cV,cV,cV"))]
|
|
1084 ""
|
|
1085 "#"
|
|
1086 "&& reload_completed"
|
|
1087 [(const_int 0)]
|
|
1088 {
|
|
1089 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1],
|
|
1090 DImode)
|
|
1091 ? VCC_REG : SCC_REG);
|
|
1092
|
|
1093 emit_insn (gen_addsi3_scalar_carry
|
|
1094 (gcn_operand_part (DImode, operands[0], 0),
|
|
1095 gcn_operand_part (DImode, operands[1], 0),
|
|
1096 gcn_operand_part (DImode, operands[2], 0),
|
|
1097 cc));
|
|
1098 rtx val = gcn_operand_part (DImode, operands[2], 1);
|
|
1099 if (val != const0_rtx)
|
|
1100 emit_insn (gen_addcsi3_scalar
|
|
1101 (gcn_operand_part (DImode, operands[0], 1),
|
|
1102 gcn_operand_part (DImode, operands[1], 1),
|
|
1103 gcn_operand_part (DImode, operands[2], 1),
|
|
1104 cc, cc));
|
|
1105 else
|
|
1106 emit_insn (gen_addcsi3_scalar_zero
|
|
1107 (gcn_operand_part (DImode, operands[0], 1),
|
|
1108 gcn_operand_part (DImode, operands[1], 1),
|
|
1109 cc));
|
|
1110 DONE;
|
|
1111 }
|
|
1112 [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
|
|
1113 (set_attr "length" "8")])
|
|
1114
|
|
1115 (define_expand "adddi3_scc"
|
|
1116 [(parallel [(set (match_operand:DI 0 "register_operand")
|
|
1117 (plus:DI (match_operand:DI 1 "register_operand")
|
|
1118 (match_operand:DI 2 "nonmemory_operand")))
|
|
1119 (clobber (reg:BI SCC_REG))
|
|
1120 (clobber (scratch:DI))])]
|
|
1121 ""
|
|
1122 {})
|
|
1123
|
|
1124 ;; Add with carry.
|
|
1125
|
|
1126 (define_insn "addsi3_scalar_carry"
|
|
1127 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
|
|
1128 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")
|
|
1129 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB")))
|
|
1130 (set (match_operand:BI 3 "register_operand" "= cs,cV")
|
|
1131 (ltu:BI (plus:SI (match_dup 1)
|
|
1132 (match_dup 2))
|
|
1133 (match_dup 1)))]
|
|
1134 ""
|
|
1135 "@
|
|
1136 s_add_u32\t%0, %1, %2
|
|
1137 v_add%^_u32\t%0, vcc, %2, %1"
|
|
1138 [(set_attr "type" "sop2,vop2")
|
|
1139 (set_attr "length" "8,8")])
|
|
1140
|
|
1141 (define_insn "addsi3_scalar_carry_cst"
|
|
1142 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
|
|
1143 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v")
|
|
1144 (match_operand:SI 2 "const_int_operand" " n, n")))
|
|
1145 (set (match_operand:BI 4 "register_operand" "=cs,cV")
|
|
1146 (geu:BI (plus:SI (match_dup 1)
|
|
1147 (match_dup 2))
|
|
1148 (match_operand:SI 3 "const_int_operand" " n, n")))]
|
|
1149 "INTVAL (operands[2]) == -INTVAL (operands[3])"
|
|
1150 "@
|
|
1151 s_add_u32\t%0, %1, %2
|
|
1152 v_add%^_u32\t%0, vcc, %2, %1"
|
|
1153 [(set_attr "type" "sop2,vop2")
|
|
1154 (set_attr "length" "4")])
|
|
1155
|
|
1156 (define_insn "addcsi3_scalar"
|
|
1157 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
|
|
1158 (plus:SI (plus:SI (zero_extend:SI
|
|
1159 (match_operand:BI 3 "register_operand" "= cs,cV"))
|
|
1160 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v"))
|
|
1161 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA")))
|
|
1162 (set (match_operand:BI 4 "register_operand" "= 3, 3")
|
|
1163 (ior:BI (ltu:BI (plus:SI
|
|
1164 (plus:SI
|
|
1165 (zero_extend:SI (match_dup 3))
|
|
1166 (match_dup 1))
|
|
1167 (match_dup 2))
|
|
1168 (match_dup 2))
|
|
1169 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1))
|
|
1170 (match_dup 1))))]
|
|
1171 ""
|
|
1172 "@
|
|
1173 s_addc_u32\t%0, %1, %2
|
|
1174 v_addc%^_u32\t%0, vcc, %2, %1, vcc"
|
|
1175 [(set_attr "type" "sop2,vop2")
|
|
1176 (set_attr "length" "8,4")])
|
|
1177
|
|
1178 (define_insn "addcsi3_scalar_zero"
|
|
1179 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
|
|
1180 (plus:SI (zero_extend:SI
|
|
1181 (match_operand:BI 2 "register_operand" "=cs,cV"))
|
|
1182 (match_operand:SI 1 "gcn_alu_operand" "SgA, v")))
|
|
1183 (set (match_dup 2)
|
|
1184 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2))
|
|
1185 (match_dup 1))
|
|
1186 (match_dup 1)))]
|
|
1187 ""
|
|
1188 "@
|
|
1189 s_addc_u32\t%0, %1, 0
|
|
1190 v_addc%^_u32\t%0, vcc, 0, %1, vcc"
|
|
1191 [(set_attr "type" "sop2,vop2")
|
|
1192 (set_attr "length" "4")])
|
|
1193
|
|
1194 ; "addptr" is the same as "add" except that it must not write to VCC or SCC
|
|
1195 ; as a side-effect. Unfortunately GCN does not have a suitable instruction
|
|
1196 ; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
|
|
1197 ; Note that it is not safe to save/clobber/restore SCC because doing so will
|
|
1198 ; break data-flow analysis, so this must use vector registers.
|
|
1199
|
|
1200 (define_insn "addptrdi3"
|
|
1201 [(set (match_operand:DI 0 "register_operand" "= &v")
|
|
1202 (plus:DI (match_operand:DI 1 "register_operand" " v0")
|
|
1203 (match_operand:DI 2 "nonmemory_operand" "vDA0")))]
|
|
1204 ""
|
|
1205 {
|
|
1206 rtx new_operands[4] = { operands[0], operands[1], operands[2],
|
|
1207 gen_rtx_REG (DImode, CC_SAVE_REG) };
|
|
1208
|
|
1209 output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands);
|
|
1210 output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands);
|
|
1211
|
|
1212 return "";
|
|
1213 }
|
|
1214 [(set_attr "type" "vmult")
|
|
1215 (set_attr "length" "16")])
|
|
1216
|
|
1217 ;; }}}
|
|
1218 ;; {{{ ALU special cases: Minus
|
|
1219
|
|
1220 (define_insn "subsi3"
|
|
1221 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v")
|
|
1222 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv")
|
|
1223 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v")))
|
|
1224 (clobber (match_scratch:BI 3 "=cs, cs, X, X"))
|
|
1225 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))]
|
|
1226 ""
|
|
1227 "@
|
|
1228 s_sub_i32\t%0, %1, %2
|
|
1229 s_sub_i32\t%0, %1, %2
|
|
1230 v_subrev%^_u32\t%0, vcc, %2, %1
|
|
1231 v_sub%^_u32\t%0, vcc, %1, %2"
|
|
1232 [(set_attr "type" "sop2,sop2,vop2,vop2")
|
|
1233 (set_attr "length" "4,8,8,8")])
|
|
1234
|
|
1235 (define_insn_and_split "subdi3"
|
|
1236 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg")
|
|
1237 (minus:DI
|
|
1238 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB")
|
|
1239 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA")))
|
|
1240 (clobber (reg:BI SCC_REG))]
|
|
1241 ""
|
|
1242 "#"
|
|
1243 "reload_completed"
|
|
1244 [(const_int 0)]
|
|
1245 {
|
|
1246 emit_insn (gen_subsi3_scalar_carry
|
|
1247 (gcn_operand_part (DImode, operands[0], 0),
|
|
1248 gcn_operand_part (DImode, operands[1], 0),
|
|
1249 gcn_operand_part (DImode, operands[2], 0)));
|
|
1250 rtx val = gcn_operand_part (DImode, operands[2], 1);
|
|
1251 if (val != const0_rtx)
|
|
1252 emit_insn (gen_subcsi3_scalar
|
|
1253 (gcn_operand_part (DImode, operands[0], 1),
|
|
1254 gcn_operand_part (DImode, operands[1], 1),
|
|
1255 gcn_operand_part (DImode, operands[2], 1)));
|
|
1256 else
|
|
1257 emit_insn (gen_subcsi3_scalar_zero
|
|
1258 (gcn_operand_part (DImode, operands[0], 1),
|
|
1259 gcn_operand_part (DImode, operands[1], 1)));
|
|
1260 DONE;
|
|
1261 }
|
|
1262 [(set_attr "length" "8")])
|
|
1263
|
|
1264 (define_insn "subsi3_scalar_carry"
|
|
1265 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
|
|
1266 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")
|
|
1267 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
|
|
1268 (set (reg:BI SCC_REG)
|
|
1269 (gtu:BI (minus:SI (match_dup 1)
|
|
1270 (match_dup 2))
|
|
1271 (match_dup 1)))]
|
|
1272 ""
|
|
1273 "s_sub_u32\t%0, %1, %2"
|
|
1274 [(set_attr "type" "sop2")
|
|
1275 (set_attr "length" "8")])
|
|
1276
|
|
1277 (define_insn "subsi3_scalar_carry_cst"
|
|
1278 [(set (match_operand:SI 0 "register_operand" "=Sg")
|
|
1279 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA")
|
|
1280 (match_operand:SI 2 "const_int_operand" " n")))
|
|
1281 (set (reg:BI SCC_REG)
|
|
1282 (leu:BI (minus:SI (match_dup 1)
|
|
1283 (match_dup 2))
|
|
1284 (match_operand:SI 3 "const_int_operand" " n")))]
|
|
1285 "INTVAL (operands[2]) == -INTVAL (operands[3])"
|
|
1286 "s_sub_u32\t%0, %1, %2"
|
|
1287 [(set_attr "type" "sop2")
|
|
1288 (set_attr "length" "4")])
|
|
1289
|
|
1290 (define_insn "subcsi3_scalar"
|
|
1291 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
|
|
1292 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
|
|
1293 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB"))
|
|
1294 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
|
|
1295 (set (reg:BI SCC_REG)
|
|
1296 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
|
|
1297 (match_dup 1))
|
|
1298 (match_dup 2))
|
|
1299 (match_dup 1))
|
|
1300 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
|
|
1301 (match_dup 1))
|
|
1302 (match_dup 1))))]
|
|
1303 ""
|
|
1304 "s_subb_u32\t%0, %1, %2"
|
|
1305 [(set_attr "type" "sop2")
|
|
1306 (set_attr "length" "8")])
|
|
1307
|
|
1308 (define_insn "subcsi3_scalar_zero"
|
|
1309 [(set (match_operand:SI 0 "register_operand" "=Sg")
|
|
1310 (minus:SI (zero_extend:SI (reg:BI SCC_REG))
|
|
1311 (match_operand:SI 1 "gcn_alu_operand" "SgA")))
|
|
1312 (set (reg:BI SCC_REG)
|
|
1313 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
|
|
1314 (match_dup 1)))]
|
|
1315 ""
|
|
1316 "s_subb_u32\t%0, %1, 0"
|
|
1317 [(set_attr "type" "sop2")
|
|
1318 (set_attr "length" "4")])
|
|
1319
|
|
1320 ;; }}}
|
|
1321 ;; {{{ ALU: mult
|
|
1322
|
|
1323 ; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long
|
|
1324 ; immediate.
|
|
1325 (define_insn "mulsi3"
|
|
1326 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v")
|
|
1327 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
|
|
1328 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))]
|
|
1329 ""
|
|
1330 "@
|
|
1331 s_mul_i32\t%0, %1, %2
|
|
1332 s_mulk_i32\t%0, %2
|
|
1333 s_mul_i32\t%0, %1, %2
|
|
1334 v_mul_lo_i32\t%0, %1, %2"
|
|
1335 [(set_attr "type" "sop2,sopk,sop2,vop3a")
|
|
1336 (set_attr "length" "4,4,8,4")])
|
|
1337
|
|
1338 (define_code_iterator any_extend [sign_extend zero_extend])
|
|
1339 (define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")])
|
|
1340 (define_code_attr su [(sign_extend "s") (zero_extend "u")])
|
|
1341 (define_code_attr u [(sign_extend "") (zero_extend "u")])
|
|
1342 (define_code_attr iu [(sign_extend "i") (zero_extend "u")])
|
|
1343 (define_code_attr e [(sign_extend "e") (zero_extend "")])
|
|
1344
|
|
1345 (define_insn "<su>mulsi3_highpart"
|
|
1346 [(set (match_operand:SI 0 "register_operand" "= v")
|
|
1347 (truncate:SI
|
|
1348 (lshiftrt:DI
|
|
1349 (mult:DI
|
|
1350 (any_extend:DI
|
|
1351 (match_operand:SI 1 "register_operand" "% v"))
|
|
1352 (any_extend:DI
|
|
1353 (match_operand:SI 2 "register_operand" "vSv")))
|
|
1354 (const_int 32))))]
|
|
1355 ""
|
|
1356 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
|
|
1357 [(set_attr "type" "vop3a")
|
|
1358 (set_attr "length" "8")])
|
|
1359
|
|
1360 (define_insn "<u>mulhisi3"
|
|
1361 [(set (match_operand:SI 0 "register_operand" "=v")
|
|
1362 (mult:SI
|
|
1363 (any_extend:SI (match_operand:HI 1 "register_operand" "%v"))
|
|
1364 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))]
|
|
1365 ""
|
|
1366 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0"
|
|
1367 [(set_attr "type" "vop_sdwa")
|
|
1368 (set_attr "length" "8")])
|
|
1369
|
|
1370 (define_insn "<u>mulqihi3_scalar"
|
|
1371 [(set (match_operand:HI 0 "register_operand" "=v")
|
|
1372 (mult:HI
|
|
1373 (any_extend:HI (match_operand:QI 1 "register_operand" "%v"))
|
|
1374 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))]
|
|
1375 ""
|
|
1376 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0"
|
|
1377 [(set_attr "type" "vop_sdwa")
|
|
1378 (set_attr "length" "8")])
|
|
1379
|
|
1380 ;; }}}
|
|
1381 ;; {{{ ALU: generic 32-bit unop
|
|
1382
|
|
1383 (define_code_iterator bitunop [not popcount])
|
|
1384 (define_code_attr popcount_extra_op [(not "") (popcount ", 0")])
|
|
1385
|
|
1386 (define_insn "<expander>si2"
|
|
1387 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
|
|
1388 (bitunop:SI
|
|
1389 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB")))
|
|
1390 (clobber (match_scratch:BI 2 "=cs, X"))]
|
|
1391 ""
|
|
1392 "@
|
|
1393 s_<s_mnemonic>0\t%0, %1
|
|
1394 v_<mnemonic>0\t%0, %1<popcount_extra_op>"
|
|
1395 [(set_attr "type" "sop1,vop1")
|
|
1396 (set_attr "length" "8")])
|
|
1397
|
|
1398 (define_code_iterator countzeros [clz ctz])
|
|
1399
|
|
1400 (define_insn "<expander>si2"
|
|
1401 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
|
|
1402 (countzeros:SI
|
|
1403 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))]
|
|
1404 ""
|
|
1405 "s_<s_mnemonic>1\t%0, %1"
|
|
1406 [(set_attr "type" "sop1")
|
|
1407 (set_attr "length" "4,8")])
|
|
1408
|
|
1409 ; The truncate ensures that a constant passed to operand 1 is treated as DImode
|
|
1410 (define_insn "<expander>di2"
|
|
1411 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
|
|
1412 (truncate:SI
|
|
1413 (countzeros:DI
|
|
1414 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))]
|
|
1415 ""
|
|
1416 "s_<s_mnemonic>1\t%0, %1"
|
|
1417 [(set_attr "type" "sop1")
|
|
1418 (set_attr "length" "4,8")])
|
|
1419
|
|
1420 ;; }}}
|
|
1421 ;; {{{ ALU: generic 32-bit binop
|
|
1422
|
|
1423 ; No plus and mult - they have variant with 16bit immediate
|
|
1424 ; and thus are defined later.
|
|
1425 (define_code_iterator binop [and ior xor smin smax umin umax
|
|
1426 ashift lshiftrt ashiftrt])
|
|
1427 (define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax])
|
|
1428 (define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt])
|
|
1429
|
|
1430 (define_insn "<expander>si3"
|
|
1431 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD")
|
|
1432 (vec_and_scalar_com:SI
|
|
1433 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0")
|
|
1434 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v")))
|
|
1435 (clobber (match_scratch:BI 3 "= cs, X, X"))]
|
|
1436 ""
|
|
1437 "@
|
|
1438 s_<mnemonic>0\t%0, %1, %2
|
|
1439 v_<mnemonic>0\t%0, %1, %2
|
|
1440 ds_<mnemonic>0\t%A0, %2%O0"
|
|
1441 [(set_attr "type" "sop2,vop2,ds")
|
|
1442 (set_attr "length" "8")])
|
|
1443
|
|
1444 (define_insn "<expander>si3"
|
|
1445 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v")
|
|
1446 (vec_and_scalar_nocom:SI
|
|
1447 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v")
|
|
1448 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB")))
|
|
1449 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
|
|
1450 ""
|
|
1451 "@
|
|
1452 s_<mnemonic>0\t%0, %1, %2
|
|
1453 s_<mnemonic>0\t%0, %1, %2
|
|
1454 v_<revmnemonic>0\t%0, %2, %1"
|
|
1455 [(set_attr "type" "sop2,sop2,vop2")
|
|
1456 (set_attr "length" "8")])
|
|
1457
|
|
1458 (define_expand "<expander>si3_scc"
|
|
1459 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand")
|
|
1460 (binop:SI
|
|
1461 (match_operand:SI 1 "gcn_valu_src0_operand")
|
|
1462 (match_operand:SI 2 "gcn_alu_operand")))
|
|
1463 (clobber (reg:BI SCC_REG))])]
|
|
1464 ""
|
|
1465 {})
|
|
1466
|
|
1467 ;; }}}
|
|
1468 ;; {{{ ALU: generic 64-bit
|
|
1469
|
|
1470 (define_code_iterator vec_and_scalar64_com [and ior xor])
|
|
1471
|
|
1472 (define_insn_and_split "<expander>di3"
|
|
1473 [(set (match_operand:DI 0 "register_operand" "= Sg, &v, &v")
|
|
1474 (vec_and_scalar64_com:DI
|
|
1475 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB,vSvDB")
|
|
1476 (match_operand:DI 2 "gcn_alu_operand" " SgC, v, 0")))
|
|
1477 (clobber (match_scratch:BI 3 "= cs, X, X"))]
|
|
1478 ""
|
|
1479 "@
|
|
1480 s_<mnemonic>0\t%0, %1, %2
|
|
1481 #
|
|
1482 #"
|
|
1483 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
|
|
1484 [(parallel [(set (match_dup 4)
|
|
1485 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6)))
|
|
1486 (clobber (match_dup 3))])
|
|
1487 (parallel [(set (match_dup 7)
|
|
1488 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9)))
|
|
1489 (clobber (match_dup 3))])]
|
|
1490 {
|
|
1491 operands[4] = gcn_operand_part (DImode, operands[0], 0);
|
|
1492 operands[5] = gcn_operand_part (DImode, operands[1], 0);
|
|
1493 operands[6] = gcn_operand_part (DImode, operands[2], 0);
|
|
1494 operands[7] = gcn_operand_part (DImode, operands[0], 1);
|
|
1495 operands[8] = gcn_operand_part (DImode, operands[1], 1);
|
|
1496 operands[9] = gcn_operand_part (DImode, operands[2], 1);
|
|
1497 }
|
|
1498 [(set_attr "type" "sop2,vop2,vop2")
|
|
1499 (set_attr "length" "8")])
|
|
1500
|
|
1501 (define_insn "<expander>di3"
|
|
1502 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v")
|
|
1503 (vec_and_scalar_nocom:DI
|
|
1504 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v")
|
|
1505 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC")))
|
|
1506 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
|
|
1507 ""
|
|
1508 "@
|
|
1509 s_<mnemonic>0\t%0, %1, %2
|
|
1510 s_<mnemonic>0\t%0, %1, %2
|
|
1511 v_<revmnemonic>0\t%0, %2, %1"
|
|
1512 [(set_attr "type" "sop2,sop2,vop2")
|
|
1513 (set_attr "length" "8")])
|
|
1514
|
|
1515 ;; }}}
|
|
1516 ;; {{{ Atomics
|
|
1517
|
|
1518 ; Each compute unit has it's own L1 cache. The L2 cache is shared between
|
|
1519 ; all the compute units. Any load or store instruction can skip L1 and
|
|
1520 ; access L2 directly using the "glc" flag. Atomic instructions also skip
|
|
1521 ; L1. The L1 cache can be flushed and invalidated using instructions.
|
|
1522 ;
|
|
1523 ; Therefore, in order for "acquire" and "release" atomic modes to work
|
|
1524 ; correctly across compute units we must flush before each "release"
|
|
1525 ; and invalidate the cache after each "acquire". It might seem like
|
|
1526 ; invalidation could be safely done before an "acquire", but since each
|
|
1527 ; compute unit can run up to 40 threads simultaneously, all reading values
|
|
1528 ; into the L1 cache, this is not actually safe.
|
|
1529 ;
|
|
1530 ; Additionally, scalar flat instructions access L2 via a different cache
|
|
1531 ; (the "constant cache"), so they have separate constrol instructions. We
|
|
1532 ; do not attempt to invalidate both caches at once; instead, atomics
|
|
1533 ; operating on scalar flat pointers will flush the constant cache, and
|
|
1534 ; atomics operating on flat or global pointers will flush L1. It is up to
|
|
1535 ; the programmer to get this right.
|
|
1536
|
|
1537 (define_code_iterator atomicops [plus minus and ior xor])
|
|
1538 (define_mode_attr X [(SI "") (DI "_X2")])
|
|
1539
|
|
1540 ;; TODO compare_and_swap test_and_set inc dec
|
|
1541 ;; Hardware also supports min and max, but GCC does not.
|
|
1542
|
|
1543 (define_expand "memory_barrier"
|
|
1544 [(set (match_dup 0)
|
|
1545 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
|
|
1546 ""
|
|
1547 {
|
|
1548 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
|
1549 MEM_VOLATILE_P (operands[0]) = 1;
|
|
1550 })
|
|
1551
|
|
1552 (define_insn "*memory_barrier"
|
|
1553 [(set (match_operand:BLK 0)
|
|
1554 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
|
|
1555 ""
|
|
1556 "buffer_wbinvl1_vol"
|
|
1557 [(set_attr "type" "mubuf")
|
|
1558 (set_attr "length" "4")])
|
|
1559
|
|
1560 ; FIXME: These patterns have been disabled as they do not seem to work
|
|
1561 ; reliably - they can cause hangs or incorrect results.
|
|
1562 ; TODO: flush caches according to memory model
|
|
1563 (define_insn "atomic_fetch_<bare_mnemonic><mode>"
|
|
1564 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
|
|
1565 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
|
|
1566 (set (match_dup 1)
|
|
1567 (unspec_volatile:SIDI
|
|
1568 [(atomicops:SIDI
|
|
1569 (match_dup 1)
|
|
1570 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))]
|
|
1571 UNSPECV_ATOMIC))
|
|
1572 (use (match_operand 3 "const_int_operand"))]
|
|
1573 "0 /* Disabled. */"
|
|
1574 "@
|
|
1575 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
|
|
1576 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
|
|
1577 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
|
|
1578 [(set_attr "type" "smem,flat,flat")
|
|
1579 (set_attr "length" "12")
|
|
1580 (set_attr "gcn_version" "gcn5,*,gcn5")])
|
|
1581
|
|
1582 ; FIXME: These patterns are disabled because the instructions don't
|
|
1583 ; seem to work as advertised. Specifically, OMP "team distribute"
|
|
1584 ; reductions apparently "lose" some of the writes, similar to what
|
|
1585 ; you might expect from a concurrent non-atomic read-modify-write.
|
|
1586 ; TODO: flush caches according to memory model
|
|
1587 (define_insn "atomic_<bare_mnemonic><mode>"
|
|
1588 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM")
|
|
1589 (unspec_volatile:SIDI
|
|
1590 [(atomicops:SIDI
|
|
1591 (match_dup 0)
|
|
1592 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))]
|
|
1593 UNSPECV_ATOMIC))
|
|
1594 (use (match_operand 2 "const_int_operand"))]
|
|
1595 "0 /* Disabled. */"
|
|
1596 "@
|
|
1597 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0)
|
|
1598 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
|
|
1599 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
|
|
1600 [(set_attr "type" "smem,flat,flat")
|
|
1601 (set_attr "length" "12")
|
|
1602 (set_attr "gcn_version" "gcn5,*,gcn5")])
|
|
1603
|
|
1604 (define_mode_attr x2 [(SI "DI") (DI "TI")])
|
|
1605 (define_mode_attr size [(SI "4") (DI "8")])
|
|
1606 (define_mode_attr bitsize [(SI "32") (DI "64")])
|
|
1607
|
|
1608 (define_expand "sync_compare_and_swap<mode>"
|
|
1609 [(match_operand:SIDI 0 "register_operand")
|
|
1610 (match_operand:SIDI 1 "memory_operand")
|
|
1611 (match_operand:SIDI 2 "register_operand")
|
|
1612 (match_operand:SIDI 3 "register_operand")]
|
|
1613 ""
|
|
1614 {
|
|
1615 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS)
|
|
1616 {
|
|
1617 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0],
|
|
1618 operands[1],
|
|
1619 operands[2],
|
|
1620 operands[3]));
|
|
1621 DONE;
|
|
1622 }
|
|
1623
|
|
1624 /* Operands 2 and 3 must be placed in consecutive registers, and passed
|
|
1625 as a combined value. */
|
|
1626 rtx src_cmp = gen_reg_rtx (<x2>mode);
|
|
1627 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]);
|
|
1628 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]);
|
|
1629 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0],
|
|
1630 operands[1],
|
|
1631 src_cmp));
|
|
1632 DONE;
|
|
1633 })
|
|
1634
|
|
1635 (define_insn "sync_compare_and_swap<mode>_insn"
|
|
1636 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
|
|
1637 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
|
|
1638 (set (match_dup 1)
|
|
1639 (unspec_volatile:SIDI
|
|
1640 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")]
|
|
1641 UNSPECV_ATOMIC))]
|
|
1642 ""
|
|
1643 "@
|
|
1644 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
|
|
1645 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
|
|
1646 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
|
|
1647 [(set_attr "type" "smem,flat,flat")
|
|
1648 (set_attr "length" "12")
|
|
1649 (set_attr "gcn_version" "gcn5,*,gcn5")
|
|
1650 (set_attr "delayeduse" "*,yes,yes")])
|
|
1651
|
|
1652 (define_insn "sync_compare_and_swap<mode>_lds_insn"
|
|
1653 [(set (match_operand:SIDI 0 "register_operand" "= v")
|
|
1654 (unspec_volatile:SIDI
|
|
1655 [(match_operand:SIDI 1 "memory_operand" "+RL")]
|
|
1656 UNSPECV_ATOMIC))
|
|
1657 (set (match_dup 1)
|
|
1658 (unspec_volatile:SIDI
|
|
1659 [(match_operand:SIDI 2 "register_operand" " v")
|
|
1660 (match_operand:SIDI 3 "register_operand" " v")]
|
|
1661 UNSPECV_ATOMIC))]
|
|
1662 ""
|
|
1663 "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)"
|
|
1664 [(set_attr "type" "ds")
|
|
1665 (set_attr "length" "12")])
|
|
1666
|
|
1667 (define_insn "atomic_load<mode>"
|
|
1668 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
|
|
1669 (unspec_volatile:SIDI
|
|
1670 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")]
|
|
1671 UNSPECV_ATOMIC))
|
|
1672 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
|
|
1673 ""
|
|
1674 {
|
|
1675 switch (INTVAL (operands[2]))
|
|
1676 {
|
|
1677 case MEMMODEL_RELAXED:
|
|
1678 switch (which_alternative)
|
|
1679 {
|
|
1680 case 0:
|
|
1681 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
|
|
1682 case 1:
|
|
1683 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0";
|
|
1684 case 2:
|
|
1685 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)";
|
|
1686 }
|
|
1687 break;
|
|
1688 case MEMMODEL_CONSUME:
|
|
1689 case MEMMODEL_ACQUIRE:
|
|
1690 case MEMMODEL_SYNC_ACQUIRE:
|
|
1691 switch (which_alternative)
|
|
1692 {
|
|
1693 case 0:
|
|
1694 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
|
|
1695 "s_dcache_wb_vol";
|
|
1696 case 1:
|
|
1697 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
|
|
1698 "buffer_wbinvl1_vol";
|
|
1699 case 2:
|
|
1700 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
|
|
1701 "buffer_wbinvl1_vol";
|
|
1702 }
|
|
1703 break;
|
|
1704 case MEMMODEL_ACQ_REL:
|
|
1705 case MEMMODEL_SEQ_CST:
|
|
1706 case MEMMODEL_SYNC_SEQ_CST:
|
|
1707 switch (which_alternative)
|
|
1708 {
|
|
1709 case 0:
|
|
1710 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
|
|
1711 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
|
1712 case 1:
|
|
1713 return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
|
|
1714 "s_waitcnt\t0\;buffer_wbinvl1_vol";
|
|
1715 case 2:
|
|
1716 return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
|
|
1717 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
|
1718 }
|
|
1719 break;
|
|
1720 }
|
|
1721 gcc_unreachable ();
|
|
1722 }
|
|
1723 [(set_attr "type" "smem,flat,flat")
|
|
1724 (set_attr "length" "20")
|
|
1725 (set_attr "gcn_version" "gcn5,*,gcn5")])
|
|
1726
|
|
1727 (define_insn "atomic_store<mode>"
|
|
1728 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
|
|
1729 (unspec_volatile:SIDI
|
|
1730 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")]
|
|
1731 UNSPECV_ATOMIC))
|
|
1732 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
|
|
1733 ""
|
|
1734 {
|
|
1735 switch (INTVAL (operands[2]))
|
|
1736 {
|
|
1737 case MEMMODEL_RELAXED:
|
|
1738 switch (which_alternative)
|
|
1739 {
|
|
1740 case 0:
|
|
1741 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
|
|
1742 case 1:
|
|
1743 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
|
|
1744 case 2:
|
|
1745 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
|
|
1746 }
|
|
1747 break;
|
|
1748 case MEMMODEL_RELEASE:
|
|
1749 case MEMMODEL_SYNC_RELEASE:
|
|
1750 switch (which_alternative)
|
|
1751 {
|
|
1752 case 0:
|
|
1753 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
|
|
1754 case 1:
|
|
1755 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
|
|
1756 case 2:
|
|
1757 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
|
|
1758 }
|
|
1759 break;
|
|
1760 case MEMMODEL_ACQ_REL:
|
|
1761 case MEMMODEL_SEQ_CST:
|
|
1762 case MEMMODEL_SYNC_SEQ_CST:
|
|
1763 switch (which_alternative)
|
|
1764 {
|
|
1765 case 0:
|
|
1766 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
|
|
1767 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
|
1768 case 1:
|
|
1769 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
|
|
1770 "s_waitcnt\t0\;buffer_wbinvl1_vol";
|
|
1771 case 2:
|
|
1772 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
|
|
1773 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
|
1774 }
|
|
1775 break;
|
|
1776 }
|
|
1777 gcc_unreachable ();
|
|
1778 }
|
|
1779 [(set_attr "type" "smem,flat,flat")
|
|
1780 (set_attr "length" "20")
|
|
1781 (set_attr "gcn_version" "gcn5,*,gcn5")])
|
|
1782
|
|
1783 (define_insn "atomic_exchange<mode>"
|
|
1784 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
|
|
1785 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
|
|
1786 (set (match_dup 1)
|
|
1787 (unspec_volatile:SIDI
|
|
1788 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")]
|
|
1789 UNSPECV_ATOMIC))
|
|
1790 (use (match_operand 3 "immediate_operand"))]
|
|
1791 ""
|
|
1792 {
|
|
1793 switch (INTVAL (operands[3]))
|
|
1794 {
|
|
1795 case MEMMODEL_RELAXED:
|
|
1796 switch (which_alternative)
|
|
1797 {
|
|
1798 case 0:
|
|
1799 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
|
|
1800 case 1:
|
|
1801 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
|
|
1802 case 2:
|
|
1803 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
|
1804 "s_waitcnt\tvmcnt(0)";
|
|
1805 }
|
|
1806 break;
|
|
1807 case MEMMODEL_CONSUME:
|
|
1808 case MEMMODEL_ACQUIRE:
|
|
1809 case MEMMODEL_SYNC_ACQUIRE:
|
|
1810 switch (which_alternative)
|
|
1811 {
|
|
1812 case 0:
|
|
1813 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
|
|
1814 "s_dcache_wb_vol\;s_dcache_inv_vol";
|
|
1815 case 1:
|
|
1816 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
|
|
1817 "buffer_wbinvl1_vol";
|
|
1818 case 2:
|
|
1819 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
|
1820 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
|
1821 }
|
|
1822 break;
|
|
1823 case MEMMODEL_RELEASE:
|
|
1824 case MEMMODEL_SYNC_RELEASE:
|
|
1825 switch (which_alternative)
|
|
1826 {
|
|
1827 case 0:
|
|
1828 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
|
|
1829 "s_waitcnt\tlgkmcnt(0)";
|
|
1830 case 1:
|
|
1831 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
|
1832 "s_waitcnt\t0";
|
|
1833 case 2:
|
|
1834 return "buffer_wbinvl1_vol\;"
|
|
1835 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
|
1836 "s_waitcnt\tvmcnt(0)";
|
|
1837 }
|
|
1838 break;
|
|
1839 case MEMMODEL_ACQ_REL:
|
|
1840 case MEMMODEL_SEQ_CST:
|
|
1841 case MEMMODEL_SYNC_SEQ_CST:
|
|
1842 switch (which_alternative)
|
|
1843 {
|
|
1844 case 0:
|
|
1845 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
|
|
1846 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
|
1847 case 1:
|
|
1848 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
|
1849 "s_waitcnt\t0\;buffer_wbinvl1_vol";
|
|
1850 case 2:
|
|
1851 return "buffer_wbinvl1_vol\;"
|
|
1852 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
|
1853 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
|
1854 }
|
|
1855 break;
|
|
1856 }
|
|
1857 gcc_unreachable ();
|
|
1858 }
|
|
1859 [(set_attr "type" "smem,flat,flat")
|
|
1860 (set_attr "length" "20")
|
|
1861 (set_attr "gcn_version" "gcn5,*,gcn5")])
|
|
1862
|
|
1863 ;; }}}
|
|
1864 ;; {{{ OpenACC / OpenMP
|
|
1865
|
|
1866 (define_expand "oacc_dim_size"
|
|
1867 [(match_operand:SI 0 "register_operand")
|
|
1868 (match_operand:SI 1 "const_int_operand")]
|
|
1869 ""
|
|
1870 {
|
|
1871 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1]));
|
|
1872 emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
|
|
1873 DONE;
|
|
1874 })
|
|
1875
|
|
1876 (define_expand "oacc_dim_pos"
|
|
1877 [(match_operand:SI 0 "register_operand")
|
|
1878 (match_operand:SI 1 "const_int_operand")]
|
|
1879 ""
|
|
1880 {
|
|
1881 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1])));
|
|
1882 DONE;
|
|
1883 })
|
|
1884
|
|
1885 (define_expand "gcn_wavefront_barrier"
|
|
1886 [(set (match_dup 0)
|
|
1887 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
|
|
1888 ""
|
|
1889 {
|
|
1890 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
|
1891 MEM_VOLATILE_P (operands[0]) = 1;
|
|
1892 })
|
|
1893
|
|
1894 (define_insn "*gcn_wavefront_barrier"
|
|
1895 [(set (match_operand:BLK 0 "")
|
|
1896 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
|
|
1897 ""
|
|
1898 "s_barrier"
|
|
1899 [(set_attr "type" "sopp")])
|
|
1900
|
|
1901 (define_expand "oacc_fork"
|
|
1902 [(set (match_operand:SI 0 "")
|
|
1903 (match_operand:SI 1 ""))
|
|
1904 (use (match_operand:SI 2 ""))]
|
|
1905 ""
|
|
1906 {
|
|
1907 /* We need to have oacc_fork/oacc_join named patterns as a pair,
|
|
1908 but the fork isn't actually used. */
|
|
1909 gcc_unreachable ();
|
|
1910 })
|
|
1911
|
|
1912 (define_expand "oacc_join"
|
|
1913 [(set (match_operand:SI 0 "")
|
|
1914 (match_operand:SI 1 ""))
|
|
1915 (use (match_operand:SI 2 ""))]
|
|
1916 ""
|
|
1917 {
|
|
1918 emit_insn (gen_gcn_wavefront_barrier ());
|
|
1919 DONE;
|
|
1920 })
|
|
1921
|
|
1922 ;; }}}
|
|
1923
|
|
1924 (include "gcn-valu.md")
|