111
|
1 ;; Machine description for NVPTX.
|
131
|
2 ;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
|
111
|
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
|
|
4 ;;
|
|
5 ;; This file is part of GCC.
|
|
6 ;;
|
|
7 ;; GCC is free software; you can redistribute it and/or modify
|
|
8 ;; it under the terms of the GNU General Public License as published by
|
|
9 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
10 ;; any later version.
|
|
11 ;;
|
|
12 ;; GCC is distributed in the hope that it will be useful,
|
|
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 ;; GNU General Public License for more details.
|
|
16 ;;
|
|
17 ;; You should have received a copy of the GNU General Public License
|
|
18 ;; along with GCC; see the file COPYING3. If not see
|
|
19 ;; <http://www.gnu.org/licenses/>.
|
|
20
|
|
21 (define_c_enum "unspec" [
|
|
22 UNSPEC_ARG_REG
|
|
23
|
|
24 UNSPEC_COPYSIGN
|
|
25 UNSPEC_LOG2
|
|
26 UNSPEC_EXP2
|
|
27 UNSPEC_SIN
|
|
28 UNSPEC_COS
|
|
29
|
|
30 UNSPEC_FPINT_FLOOR
|
|
31 UNSPEC_FPINT_BTRUNC
|
|
32 UNSPEC_FPINT_CEIL
|
|
33 UNSPEC_FPINT_NEARBYINT
|
|
34
|
|
35 UNSPEC_BITREV
|
|
36
|
|
37 UNSPEC_ALLOCA
|
|
38
|
|
39 UNSPEC_SET_SOFTSTACK
|
|
40
|
|
41 UNSPEC_DIM_SIZE
|
|
42
|
|
43 UNSPEC_BIT_CONV
|
|
44
|
|
45 UNSPEC_VOTE_BALLOT
|
|
46
|
|
47 UNSPEC_LANEID
|
|
48
|
|
49 UNSPEC_SHUFFLE
|
|
50 UNSPEC_BR_UNIFIED
|
|
51 ])
|
|
52
|
|
53 (define_c_enum "unspecv" [
|
|
54 UNSPECV_LOCK
|
|
55 UNSPECV_CAS
|
|
56 UNSPECV_XCHG
|
|
57 UNSPECV_BARSYNC
|
131
|
58 UNSPECV_MEMBAR
|
|
59 UNSPECV_MEMBAR_CTA
|
111
|
60 UNSPECV_DIM_POS
|
|
61
|
|
62 UNSPECV_FORK
|
|
63 UNSPECV_FORKED
|
|
64 UNSPECV_JOINING
|
|
65 UNSPECV_JOIN
|
|
66
|
|
67 UNSPECV_NOUNROLL
|
|
68
|
|
69 UNSPECV_SIMT_ENTER
|
|
70 UNSPECV_SIMT_EXIT
|
|
71 ])
|
|
72
|
|
73 (define_attr "subregs_ok" "false,true"
|
|
74 (const_string "false"))
|
|
75
|
|
76 (define_attr "atomic" "false,true"
|
|
77 (const_string "false"))
|
|
78
|
|
79 ;; The nvptx operand predicates, in general, don't permit subregs and
|
|
80 ;; only literal constants, which differ from the generic ones, which
|
|
81 ;; permit subregs and symbolc constants (as appropriate)
|
|
82 (define_predicate "nvptx_register_operand"
|
|
83 (match_code "reg")
|
|
84 {
|
|
85 return register_operand (op, mode);
|
|
86 })
|
|
87
|
|
88 (define_predicate "nvptx_nonimmediate_operand"
|
|
89 (match_code "mem,reg")
|
|
90 {
|
|
91 return (REG_P (op) ? register_operand (op, mode)
|
|
92 : memory_operand (op, mode));
|
|
93 })
|
|
94
|
|
95 (define_predicate "nvptx_nonmemory_operand"
|
|
96 (match_code "reg,const_int,const_double")
|
|
97 {
|
|
98 return (REG_P (op) ? register_operand (op, mode)
|
|
99 : immediate_operand (op, mode));
|
|
100 })
|
|
101
|
|
102 (define_predicate "const0_operand"
|
|
103 (and (match_code "const_int")
|
|
104 (match_test "op == const0_rtx")))
|
|
105
|
|
106 ;; True if this operator is valid for predication.
|
|
107 (define_predicate "predicate_operator"
|
|
108 (match_code "eq,ne"))
|
|
109
|
|
110 (define_predicate "ne_operator"
|
|
111 (match_code "ne"))
|
|
112
|
|
113 (define_predicate "nvptx_comparison_operator"
|
|
114 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
|
|
115
|
|
116 (define_predicate "nvptx_float_comparison_operator"
|
|
117 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
|
|
118
|
|
119 ;; Test for a valid operand for a call instruction.
|
|
120 (define_predicate "call_insn_operand"
|
|
121 (match_code "symbol_ref,reg")
|
|
122 {
|
|
123 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
|
|
124 })
|
|
125
|
|
126 ;; Return true if OP is a call with parallel USEs of the argument
|
|
127 ;; pseudos.
|
|
128 (define_predicate "call_operation"
|
|
129 (match_code "parallel")
|
|
130 {
|
|
131 int arg_end = XVECLEN (op, 0);
|
|
132
|
|
133 for (int i = 1; i < arg_end; i++)
|
|
134 {
|
|
135 rtx elt = XVECEXP (op, 0, i);
|
|
136
|
|
137 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
|
|
138 return false;
|
|
139 }
|
|
140 return true;
|
|
141 })
|
|
142
|
|
143 (define_attr "predicable" "false,true"
|
|
144 (const_string "true"))
|
|
145
|
|
146 (define_cond_exec
|
|
147 [(match_operator 0 "predicate_operator"
|
|
148 [(match_operand:BI 1 "nvptx_register_operand" "")
|
|
149 (match_operand:BI 2 "const0_operand" "")])]
|
|
150 ""
|
|
151 ""
|
|
152 )
|
|
153
|
|
154 (define_constraint "P0"
|
|
155 "An integer with the value 0."
|
|
156 (and (match_code "const_int")
|
|
157 (match_test "ival == 0")))
|
|
158
|
|
159 (define_constraint "P1"
|
|
160 "An integer with the value 1."
|
|
161 (and (match_code "const_int")
|
|
162 (match_test "ival == 1")))
|
|
163
|
|
164 (define_constraint "Pn"
|
|
165 "An integer with the value -1."
|
|
166 (and (match_code "const_int")
|
|
167 (match_test "ival == -1")))
|
|
168
|
|
169 (define_constraint "R"
|
|
170 "A pseudo register."
|
|
171 (match_code "reg"))
|
|
172
|
|
173 (define_constraint "Ia"
|
|
174 "Any integer constant."
|
|
175 (and (match_code "const_int") (match_test "true")))
|
|
176
|
|
177 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
|
|
178 (define_mode_iterator QHSDIM [QI HI SI DI])
|
|
179 (define_mode_iterator HSDIM [HI SI DI])
|
|
180 (define_mode_iterator BHSDIM [BI HI SI DI])
|
|
181 (define_mode_iterator SDIM [SI DI])
|
|
182 (define_mode_iterator SDISDFM [SI DI SF DF])
|
|
183 (define_mode_iterator QHIM [QI HI])
|
|
184 (define_mode_iterator QHSIM [QI HI SI])
|
|
185 (define_mode_iterator SDFM [SF DF])
|
|
186 (define_mode_iterator SDCM [SC DC])
|
|
187 (define_mode_iterator BITS [SI SF])
|
|
188 (define_mode_iterator BITD [DI DF])
|
|
189 (define_mode_iterator VECIM [V2SI V2DI])
|
|
190
|
|
191 ;; This mode iterator allows :P to be used for patterns that operate on
|
|
192 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
|
|
193 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
|
|
194
|
|
195 ;; We should get away with not defining memory alternatives, since we don't
|
|
196 ;; get variables in this mode and pseudos are never spilled.
|
|
197 (define_insn "movbi"
|
|
198 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
|
|
199 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
|
|
200 ""
|
|
201 "@
|
|
202 %.\\tmov%t0\\t%0, %1;
|
|
203 %.\\tsetp.eq.u32\\t%0, 1, 0;
|
|
204 %.\\tsetp.eq.u32\\t%0, 1, 1;")
|
|
205
|
|
206 (define_insn "*mov<mode>_insn"
|
|
207 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
|
|
208 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
|
|
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
210 {
|
|
211 if (which_alternative == 1)
|
|
212 return "%.\\tld%A1%u1\\t%0, %1;";
|
|
213 if (which_alternative == 2)
|
|
214 return "%.\\tst%A0%u0\\t%0, %1;";
|
|
215
|
|
216 return nvptx_output_mov_insn (operands[0], operands[1]);
|
|
217 }
|
|
218 [(set_attr "subregs_ok" "true")])
|
|
219
|
|
220 (define_insn "*mov<mode>_insn"
|
|
221 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
|
|
222 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
|
|
223 "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
224 {
|
|
225 if (which_alternative == 1)
|
|
226 return "%.\\tld%A1%u1\\t%0, %1;";
|
|
227 if (which_alternative == 2)
|
|
228 return "%.\\tst%A0%u0\\t%0, %1;";
|
|
229
|
|
230 return nvptx_output_mov_insn (operands[0], operands[1]);
|
|
231 }
|
|
232 [(set_attr "subregs_ok" "true")])
|
|
233
|
|
234 (define_insn "*mov<mode>_insn"
|
|
235 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
|
|
236 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
|
|
237 "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
238 {
|
|
239 if (which_alternative == 1)
|
|
240 return "%.\\tld%A1%u0\\t%0, %1;";
|
|
241 if (which_alternative == 2)
|
|
242 return "%.\\tst%A0%u1\\t%0, %1;";
|
|
243
|
|
244 return nvptx_output_mov_insn (operands[0], operands[1]);
|
|
245 }
|
|
246 [(set_attr "subregs_ok" "true")])
|
|
247
|
|
248 (define_insn "load_arg_reg<mode>"
|
|
249 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
|
|
250 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
|
|
251 UNSPEC_ARG_REG))]
|
|
252 ""
|
|
253 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
|
|
254
|
|
255 (define_insn "load_arg_reg<mode>"
|
|
256 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
|
|
257 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
|
|
258 UNSPEC_ARG_REG))]
|
|
259 ""
|
|
260 "%.\\tmov%t0\\t%0, %%ar%1;")
|
|
261
|
|
262 (define_expand "mov<mode>"
|
|
263 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
|
|
264 (match_operand:VECIM 1 "general_operand" ""))]
|
|
265 ""
|
|
266 {
|
|
267 if (MEM_P (operands[0]) && !REG_P (operands[1]))
|
|
268 {
|
|
269 rtx tmp = gen_reg_rtx (<MODE>mode);
|
|
270 emit_move_insn (tmp, operands[1]);
|
|
271 emit_move_insn (operands[0], tmp);
|
|
272 DONE;
|
|
273 }
|
|
274 })
|
|
275
|
|
276 (define_expand "mov<mode>"
|
|
277 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
|
|
278 (match_operand:QHSDISDFM 1 "general_operand" ""))]
|
|
279 ""
|
|
280 {
|
|
281 if (MEM_P (operands[0]) && !REG_P (operands[1]))
|
|
282 {
|
|
283 rtx tmp = gen_reg_rtx (<MODE>mode);
|
|
284 emit_move_insn (tmp, operands[1]);
|
|
285 emit_move_insn (operands[0], tmp);
|
|
286 DONE;
|
|
287 }
|
|
288
|
|
289 if (GET_CODE (operands[1]) == LABEL_REF)
|
|
290 sorry ("target cannot support label values");
|
|
291 })
|
|
292
|
|
293 (define_insn "zero_extendqihi2"
|
|
294 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
|
|
295 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
|
|
296 ""
|
|
297 "@
|
|
298 %.\\tcvt.u16.u%T1\\t%0, %1;
|
|
299 %.\\tld%A1.u8\\t%0, %1;"
|
|
300 [(set_attr "subregs_ok" "true")])
|
|
301
|
|
302 (define_insn "zero_extend<mode>si2"
|
|
303 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
|
|
304 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
|
|
305 ""
|
|
306 "@
|
|
307 %.\\tcvt.u32.u%T1\\t%0, %1;
|
|
308 %.\\tld%A1.u%T1\\t%0, %1;"
|
|
309 [(set_attr "subregs_ok" "true")])
|
|
310
|
|
311 (define_insn "zero_extend<mode>di2"
|
|
312 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
|
|
313 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
|
|
314 ""
|
|
315 "@
|
|
316 %.\\tcvt.u64.u%T1\\t%0, %1;
|
|
317 %.\\tld%A1%u1\\t%0, %1;"
|
|
318 [(set_attr "subregs_ok" "true")])
|
|
319
|
|
320 (define_insn "extend<mode>si2"
|
|
321 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
|
|
322 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
|
|
323 ""
|
|
324 "@
|
|
325 %.\\tcvt.s32.s%T1\\t%0, %1;
|
|
326 %.\\tld%A1.s%T1\\t%0, %1;"
|
|
327 [(set_attr "subregs_ok" "true")])
|
|
328
|
|
329 (define_insn "extend<mode>di2"
|
|
330 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
|
|
331 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
|
|
332 ""
|
|
333 "@
|
|
334 %.\\tcvt.s64.s%T1\\t%0, %1;
|
|
335 %.\\tld%A1.s%T1\\t%0, %1;"
|
|
336 [(set_attr "subregs_ok" "true")])
|
|
337
|
|
338 (define_insn "trunchiqi2"
|
|
339 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
|
|
340 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
|
|
341 ""
|
|
342 "@
|
|
343 %.\\tcvt%t0.u16\\t%0, %1;
|
|
344 %.\\tst%A0.u8\\t%0, %1;"
|
|
345 [(set_attr "subregs_ok" "true")])
|
|
346
|
|
347 (define_insn "truncsi<mode>2"
|
|
348 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
|
|
349 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
|
|
350 ""
|
|
351 "@
|
|
352 %.\\tcvt%t0.u32\\t%0, %1;
|
|
353 %.\\tst%A0.u%T0\\t%0, %1;"
|
|
354 [(set_attr "subregs_ok" "true")])
|
|
355
|
|
356 (define_insn "truncdi<mode>2"
|
|
357 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
|
|
358 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
|
|
359 ""
|
|
360 "@
|
|
361 %.\\tcvt%t0.u64\\t%0, %1;
|
|
362 %.\\tst%A0.u%T0\\t%0, %1;"
|
|
363 [(set_attr "subregs_ok" "true")])
|
|
364
|
|
365 ;; Integer arithmetic
|
|
366
|
|
367 (define_insn "add<mode>3"
|
|
368 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
369 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
370 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
371 ""
|
|
372 "%.\\tadd%t0\\t%0, %1, %2;")
|
|
373
|
|
374 (define_insn "sub<mode>3"
|
|
375 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
376 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
377 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
|
|
378 ""
|
|
379 "%.\\tsub%t0\\t%0, %1, %2;")
|
|
380
|
|
381 (define_insn "mul<mode>3"
|
|
382 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
383 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
384 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
385 ""
|
|
386 "%.\\tmul.lo%t0\\t%0, %1, %2;")
|
|
387
|
|
388 (define_insn "*mad<mode>3"
|
|
389 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
390 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
391 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
|
|
392 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
|
|
393 ""
|
|
394 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
|
|
395
|
|
396 (define_insn "div<mode>3"
|
|
397 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
398 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
399 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
400 ""
|
|
401 "%.\\tdiv.s%T0\\t%0, %1, %2;")
|
|
402
|
|
403 (define_insn "udiv<mode>3"
|
|
404 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
405 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
406 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
407 ""
|
|
408 "%.\\tdiv.u%T0\\t%0, %1, %2;")
|
|
409
|
|
410 (define_insn "mod<mode>3"
|
|
411 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
412 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
|
|
413 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
414 ""
|
|
415 "%.\\trem.s%T0\\t%0, %1, %2;")
|
|
416
|
|
417 (define_insn "umod<mode>3"
|
|
418 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
419 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
|
|
420 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
421 ""
|
|
422 "%.\\trem.u%T0\\t%0, %1, %2;")
|
|
423
|
|
424 (define_insn "smin<mode>3"
|
|
425 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
426 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
427 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
428 ""
|
|
429 "%.\\tmin.s%T0\\t%0, %1, %2;")
|
|
430
|
|
431 (define_insn "umin<mode>3"
|
|
432 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
433 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
434 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
435 ""
|
|
436 "%.\\tmin.u%T0\\t%0, %1, %2;")
|
|
437
|
|
438 (define_insn "smax<mode>3"
|
|
439 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
440 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
441 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
442 ""
|
|
443 "%.\\tmax.s%T0\\t%0, %1, %2;")
|
|
444
|
|
445 (define_insn "umax<mode>3"
|
|
446 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
447 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
|
448 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
449 ""
|
|
450 "%.\\tmax.u%T0\\t%0, %1, %2;")
|
|
451
|
|
452 (define_insn "abs<mode>2"
|
|
453 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
454 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
|
|
455 ""
|
|
456 "%.\\tabs.s%T0\\t%0, %1;")
|
|
457
|
|
458 (define_insn "neg<mode>2"
|
|
459 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
460 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
|
|
461 ""
|
|
462 "%.\\tneg.s%T0\\t%0, %1;")
|
|
463
|
|
464 (define_insn "one_cmpl<mode>2"
|
|
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
466 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
|
|
467 ""
|
|
468 "%.\\tnot.b%T0\\t%0, %1;")
|
|
469
|
|
470 (define_insn "bitrev<mode>2"
|
|
471 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
472 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
|
|
473 UNSPEC_BITREV))]
|
|
474 ""
|
|
475 "%.\\tbrev.b%T0\\t%0, %1;")
|
|
476
|
|
477 (define_insn "clz<mode>2"
|
|
478 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
479 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
|
|
480 ""
|
|
481 "%.\\tclz.b%T1\\t%0, %1;")
|
|
482
|
|
483 (define_expand "ctz<mode>2"
|
|
484 [(set (match_operand:SI 0 "nvptx_register_operand" "")
|
|
485 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
|
|
486 ""
|
|
487 {
|
|
488 rtx tmpreg = gen_reg_rtx (<MODE>mode);
|
|
489 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
|
|
490 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
|
|
491 DONE;
|
|
492 })
|
|
493
|
|
494 ;; Shifts
|
|
495
|
|
496 (define_insn "ashl<mode>3"
|
|
497 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
498 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
|
|
499 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
500 ""
|
|
501 "%.\\tshl.b%T0\\t%0, %1, %2;")
|
|
502
|
|
503 (define_insn "ashr<mode>3"
|
|
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
505 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
|
|
506 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
507 ""
|
|
508 "%.\\tshr.s%T0\\t%0, %1, %2;")
|
|
509
|
|
510 (define_insn "lshr<mode>3"
|
|
511 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
512 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
|
|
513 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
514 ""
|
|
515 "%.\\tshr.u%T0\\t%0, %1, %2;")
|
|
516
|
|
517 ;; Logical operations
|
|
518
|
|
519 (define_insn "and<mode>3"
|
|
520 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
|
|
521 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
|
|
522 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
523 ""
|
|
524 "%.\\tand.b%T0\\t%0, %1, %2;")
|
|
525
|
|
526 (define_insn "ior<mode>3"
|
|
527 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
|
|
528 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
|
|
529 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
530 ""
|
|
531 "%.\\tor.b%T0\\t%0, %1, %2;")
|
|
532
|
|
533 (define_insn "xor<mode>3"
|
|
534 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
|
|
535 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
|
|
536 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
|
537 ""
|
|
538 "%.\\txor.b%T0\\t%0, %1, %2;")
|
|
539
|
|
540 ;; Comparisons and branches
|
|
541
|
|
542 (define_insn "*cmp<mode>"
|
|
543 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
|
|
544 (match_operator:BI 1 "nvptx_comparison_operator"
|
|
545 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
|
|
546 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
|
|
547 ""
|
|
548 "%.\\tsetp%c1\\t%0, %2, %3;")
|
|
549
|
|
550 (define_insn "*cmp<mode>"
|
|
551 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
|
|
552 (match_operator:BI 1 "nvptx_float_comparison_operator"
|
|
553 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
|
|
554 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
|
|
555 ""
|
|
556 "%.\\tsetp%c1\\t%0, %2, %3;")
|
|
557
|
|
558 (define_insn "jump"
|
|
559 [(set (pc)
|
|
560 (label_ref (match_operand 0 "" "")))]
|
|
561 ""
|
|
562 "%.\\tbra\\t%l0;")
|
|
563
|
|
564 (define_insn "br_true"
|
|
565 [(set (pc)
|
|
566 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
|
|
567 (const_int 0))
|
|
568 (label_ref (match_operand 1 "" ""))
|
|
569 (pc)))]
|
|
570 ""
|
|
571 "%j0\\tbra\\t%l1;"
|
|
572 [(set_attr "predicable" "false")])
|
|
573
|
|
574 (define_insn "br_false"
|
|
575 [(set (pc)
|
|
576 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
|
|
577 (const_int 0))
|
|
578 (label_ref (match_operand 1 "" ""))
|
|
579 (pc)))]
|
|
580 ""
|
|
581 "%J0\\tbra\\t%l1;"
|
|
582 [(set_attr "predicable" "false")])
|
|
583
|
|
584 ;; unified conditional branch
|
|
585 (define_insn "br_true_uni"
|
|
586 [(set (pc) (if_then_else
|
|
587 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
|
|
588 UNSPEC_BR_UNIFIED) (const_int 0))
|
|
589 (label_ref (match_operand 1 "" "")) (pc)))]
|
|
590 ""
|
|
591 "%j0\\tbra.uni\\t%l1;"
|
|
592 [(set_attr "predicable" "false")])
|
|
593
|
|
594 (define_insn "br_false_uni"
|
|
595 [(set (pc) (if_then_else
|
|
596 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
|
|
597 UNSPEC_BR_UNIFIED) (const_int 0))
|
|
598 (label_ref (match_operand 1 "" "")) (pc)))]
|
|
599 ""
|
|
600 "%J0\\tbra.uni\\t%l1;"
|
|
601 [(set_attr "predicable" "false")])
|
|
602
|
|
603 (define_expand "cbranch<mode>4"
|
|
604 [(set (pc)
|
|
605 (if_then_else (match_operator 0 "nvptx_comparison_operator"
|
|
606 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
|
|
607 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
|
|
608 (label_ref (match_operand 3 "" ""))
|
|
609 (pc)))]
|
|
610 ""
|
|
611 {
|
|
612 rtx t = nvptx_expand_compare (operands[0]);
|
|
613 operands[0] = t;
|
|
614 operands[1] = XEXP (t, 0);
|
|
615 operands[2] = XEXP (t, 1);
|
|
616 })
|
|
617
|
|
618 (define_expand "cbranch<mode>4"
|
|
619 [(set (pc)
|
|
620 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
|
|
621 [(match_operand:SDFM 1 "nvptx_register_operand" "")
|
|
622 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
|
|
623 (label_ref (match_operand 3 "" ""))
|
|
624 (pc)))]
|
|
625 ""
|
|
626 {
|
|
627 rtx t = nvptx_expand_compare (operands[0]);
|
|
628 operands[0] = t;
|
|
629 operands[1] = XEXP (t, 0);
|
|
630 operands[2] = XEXP (t, 1);
|
|
631 })
|
|
632
|
|
633 (define_expand "cbranchbi4"
|
|
634 [(set (pc)
|
|
635 (if_then_else (match_operator 0 "predicate_operator"
|
|
636 [(match_operand:BI 1 "nvptx_register_operand" "")
|
|
637 (match_operand:BI 2 "const0_operand" "")])
|
|
638 (label_ref (match_operand 3 "" ""))
|
|
639 (pc)))]
|
|
640 ""
|
|
641 "")
|
|
642
|
|
643 ;; Conditional stores
|
|
644
|
|
645 (define_insn "setcc_from_bi"
|
|
646 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
647 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
|
|
648 (const_int 0)))]
|
|
649 ""
|
|
650 "%.\\tselp%t0 %0,-1,0,%1;")
|
|
651
|
|
652 (define_insn "sel_true<mode>"
|
|
653 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
654 (if_then_else:HSDIM
|
|
655 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
|
656 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
|
|
657 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
|
|
658 ""
|
|
659 "%.\\tselp%t0\\t%0, %2, %3, %1;")
|
|
660
|
|
661 (define_insn "sel_true<mode>"
|
|
662 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
663 (if_then_else:SDFM
|
|
664 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
|
665 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
|
|
666 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
|
|
667 ""
|
|
668 "%.\\tselp%t0\\t%0, %2, %3, %1;")
|
|
669
|
|
670 (define_insn "sel_false<mode>"
|
|
671 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
|
672 (if_then_else:HSDIM
|
|
673 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
|
674 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
|
|
675 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
|
|
676 ""
|
|
677 "%.\\tselp%t0\\t%0, %3, %2, %1;")
|
|
678
|
|
679 (define_insn "sel_false<mode>"
|
|
680 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
681 (if_then_else:SDFM
|
|
682 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
|
|
683 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
|
|
684 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
|
|
685 ""
|
|
686 "%.\\tselp%t0\\t%0, %3, %2, %1;")
|
|
687
|
|
688 (define_insn "setcc_int<mode>"
|
|
689 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
690 (match_operator:SI 1 "nvptx_comparison_operator"
|
|
691 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
|
|
692 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
|
|
693 ""
|
|
694 "%.\\tset%t0%c1\\t%0, %2, %3;")
|
|
695
|
|
696 (define_insn "setcc_int<mode>"
|
|
697 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
698 (match_operator:SI 1 "nvptx_float_comparison_operator"
|
|
699 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
|
|
700 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
|
|
701 ""
|
|
702 "%.\\tset%t0%c1\\t%0, %2, %3;")
|
|
703
|
|
704 (define_insn "setcc_float<mode>"
|
|
705 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
706 (match_operator:SF 1 "nvptx_comparison_operator"
|
|
707 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
|
|
708 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
|
|
709 ""
|
|
710 "%.\\tset%t0%c1\\t%0, %2, %3;")
|
|
711
|
|
712 (define_insn "setcc_float<mode>"
|
|
713 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
714 (match_operator:SF 1 "nvptx_float_comparison_operator"
|
|
715 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
|
|
716 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
|
|
717 ""
|
|
718 "%.\\tset%t0%c1\\t%0, %2, %3;")
|
|
719
|
|
720 (define_expand "cstorebi4"
|
|
721 [(set (match_operand:SI 0 "nvptx_register_operand")
|
|
722 (match_operator:SI 1 "ne_operator"
|
|
723 [(match_operand:BI 2 "nvptx_register_operand")
|
|
724 (match_operand:BI 3 "const0_operand")]))]
|
|
725 ""
|
|
726 "")
|
|
727
|
|
728 (define_expand "cstore<mode>4"
|
|
729 [(set (match_operand:SI 0 "nvptx_register_operand")
|
|
730 (match_operator:SI 1 "nvptx_comparison_operator"
|
|
731 [(match_operand:HSDIM 2 "nvptx_register_operand")
|
|
732 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
|
|
733 ""
|
|
734 "")
|
|
735
|
|
736 (define_expand "cstore<mode>4"
|
|
737 [(set (match_operand:SI 0 "nvptx_register_operand")
|
|
738 (match_operator:SI 1 "nvptx_float_comparison_operator"
|
|
739 [(match_operand:SDFM 2 "nvptx_register_operand")
|
|
740 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
|
|
741 ""
|
|
742 "")
|
|
743
|
|
744 ;; Calls
|
|
745
|
|
746 (define_insn "call_insn"
|
|
747 [(match_parallel 2 "call_operation"
|
|
748 [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs"))
|
|
749 (match_operand 1))])]
|
|
750 ""
|
|
751 {
|
|
752 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
|
|
753 })
|
|
754
|
|
755 (define_insn "call_value_insn"
|
|
756 [(match_parallel 3 "call_operation"
|
|
757 [(set (match_operand 0 "nvptx_register_operand" "=R")
|
|
758 (call (mem:QI (match_operand 1 "call_insn_operand" "Rs"))
|
|
759 (match_operand 2)))])]
|
|
760 ""
|
|
761 {
|
|
762 return nvptx_output_call_insn (insn, operands[0], operands[1]);
|
|
763 })
|
|
764
|
|
765 (define_expand "call"
|
|
766 [(match_operand 0 "" "")]
|
|
767 ""
|
|
768 {
|
|
769 nvptx_expand_call (NULL_RTX, operands[0]);
|
|
770 DONE;
|
|
771 })
|
|
772
|
|
773 (define_expand "call_value"
|
|
774 [(match_operand 0 "" "")
|
|
775 (match_operand 1 "" "")]
|
|
776 ""
|
|
777 {
|
|
778 nvptx_expand_call (operands[0], operands[1]);
|
|
779 DONE;
|
|
780 })
|
|
781
|
|
782 ;; Floating point arithmetic.
|
|
783
|
|
784 (define_insn "add<mode>3"
|
|
785 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
786 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
787 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
|
|
788 ""
|
|
789 "%.\\tadd%t0\\t%0, %1, %2;")
|
|
790
|
|
791 (define_insn "sub<mode>3"
|
|
792 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
793 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
794 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
|
|
795 ""
|
|
796 "%.\\tsub%t0\\t%0, %1, %2;")
|
|
797
|
|
798 (define_insn "mul<mode>3"
|
|
799 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
800 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
801 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
|
|
802 ""
|
|
803 "%.\\tmul%t0\\t%0, %1, %2;")
|
|
804
|
|
805 (define_insn "fma<mode>4"
|
|
806 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
807 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
|
|
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
|
|
810 ""
|
|
811 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
|
|
812
|
|
813 (define_insn "div<mode>3"
|
|
814 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
815 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
816 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
|
|
817 ""
|
|
818 "%.\\tdiv%#%t0\\t%0, %1, %2;")
|
|
819
|
|
820 (define_insn "copysign<mode>3"
|
|
821 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
822 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
823 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
|
|
824 UNSPEC_COPYSIGN))]
|
|
825 ""
|
|
826 "%.\\tcopysign%t0\\t%0, %2, %1;")
|
|
827
|
|
828 (define_insn "smin<mode>3"
|
|
829 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
830 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
831 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
|
|
832 ""
|
|
833 "%.\\tmin%t0\\t%0, %1, %2;")
|
|
834
|
|
835 (define_insn "smax<mode>3"
|
|
836 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
837 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
|
|
838 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
|
|
839 ""
|
|
840 "%.\\tmax%t0\\t%0, %1, %2;")
|
|
841
|
|
842 (define_insn "abs<mode>2"
|
|
843 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
844 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
845 ""
|
|
846 "%.\\tabs%t0\\t%0, %1;")
|
|
847
|
|
848 (define_insn "neg<mode>2"
|
|
849 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
850 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
851 ""
|
|
852 "%.\\tneg%t0\\t%0, %1;")
|
|
853
|
|
854 (define_insn "sqrt<mode>2"
|
|
855 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
856 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
857 ""
|
|
858 "%.\\tsqrt%#%t0\\t%0, %1;")
|
|
859
|
|
860 (define_expand "sincossf3"
|
|
861 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
862 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
|
|
863 UNSPEC_COS))
|
|
864 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
|
|
865 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
|
|
866 "flag_unsafe_math_optimizations"
|
|
867 {
|
|
868 operands[2] = make_safe_from (operands[2], operands[0]);
|
|
869 })
|
|
870
|
|
871 (define_insn "sinsf2"
|
|
872 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
873 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
|
|
874 UNSPEC_SIN))]
|
|
875 "flag_unsafe_math_optimizations"
|
|
876 "%.\\tsin.approx%t0\\t%0, %1;")
|
|
877
|
|
878 (define_insn "cossf2"
|
|
879 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
880 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
|
|
881 UNSPEC_COS))]
|
|
882 "flag_unsafe_math_optimizations"
|
|
883 "%.\\tcos.approx%t0\\t%0, %1;")
|
|
884
|
|
885 (define_insn "log2sf2"
|
|
886 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
887 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
|
|
888 UNSPEC_LOG2))]
|
|
889 "flag_unsafe_math_optimizations"
|
|
890 "%.\\tlg2.approx%t0\\t%0, %1;")
|
|
891
|
|
892 (define_insn "exp2sf2"
|
|
893 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
894 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
|
|
895 UNSPEC_EXP2))]
|
|
896 "flag_unsafe_math_optimizations"
|
|
897 "%.\\tex2.approx%t0\\t%0, %1;")
|
|
898
|
|
899 ;; Conversions involving floating point
|
|
900
|
|
901 (define_insn "extendsfdf2"
|
|
902 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
|
|
903 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
|
|
904 ""
|
|
905 "%.\\tcvt%t0%t1\\t%0, %1;")
|
|
906
|
|
907 (define_insn "truncdfsf2"
|
|
908 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
909 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
|
|
910 ""
|
|
911 "%.\\tcvt%#%t0%t1\\t%0, %1;")
|
|
912
|
|
913 (define_insn "floatunssi<mode>2"
|
|
914 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
915 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
|
|
916 ""
|
|
917 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
|
|
918
|
|
919 (define_insn "floatsi<mode>2"
|
|
920 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
921 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
|
|
922 ""
|
|
923 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
|
|
924
|
|
925 (define_insn "floatunsdi<mode>2"
|
|
926 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
927 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
|
|
928 ""
|
|
929 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
|
|
930
|
|
931 (define_insn "floatdi<mode>2"
|
|
932 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
933 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
|
|
934 ""
|
|
935 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
|
|
936
|
|
937 (define_insn "fixuns_trunc<mode>si2"
|
|
938 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
939 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
940 ""
|
|
941 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
|
|
942
|
|
943 (define_insn "fix_trunc<mode>si2"
|
|
944 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
945 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
946 ""
|
|
947 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
|
|
948
|
|
949 (define_insn "fixuns_trunc<mode>di2"
|
|
950 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
|
|
951 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
952 ""
|
|
953 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
|
|
954
|
|
955 (define_insn "fix_trunc<mode>di2"
|
|
956 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
|
|
957 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
|
|
958 ""
|
|
959 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
|
|
960
|
|
961 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
|
|
962 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
|
|
963 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
|
|
964 (UNSPEC_FPINT_BTRUNC "btrunc")
|
|
965 (UNSPEC_FPINT_CEIL "ceil")
|
|
966 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
|
|
967 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
|
|
968 (UNSPEC_FPINT_BTRUNC ".rzi")
|
|
969 (UNSPEC_FPINT_CEIL ".rpi")
|
|
970 (UNSPEC_FPINT_NEARBYINT "%#i")])
|
|
971
|
|
972 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
|
|
973 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
|
|
974 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
|
|
975 FPINT))]
|
|
976 ""
|
|
977 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
|
|
978
|
|
979 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
|
|
980 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
|
|
981 (UNSPEC_FPINT_CEIL "lceil")])
|
|
982 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
|
|
983 (UNSPEC_FPINT_CEIL ".rpi")])
|
|
984
|
|
985 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
|
|
986 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
987 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
|
|
988 FPINT2))]
|
|
989 ""
|
|
990 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
|
|
991
|
|
992 ;; Miscellaneous
|
|
993
|
|
994 (define_insn "nop"
|
|
995 [(const_int 0)]
|
|
996 ""
|
|
997 "")
|
|
998
|
131
|
999 (define_insn "exit"
|
|
1000 [(const_int 1)]
|
|
1001 ""
|
|
1002 "exit;")
|
|
1003
|
|
1004 (define_insn "fake_nop"
|
|
1005 [(const_int 2)]
|
|
1006 ""
|
|
1007 "{
|
|
1008 .reg .u32 %%nop_src;
|
|
1009 .reg .u32 %%nop_dst;
|
|
1010 mov.u32 %%nop_dst, %%nop_src;
|
|
1011 }")
|
|
1012
|
111
|
1013 (define_insn "return"
|
|
1014 [(return)]
|
|
1015 ""
|
|
1016 {
|
|
1017 return nvptx_output_return ();
|
|
1018 }
|
|
1019 [(set_attr "predicable" "false")])
|
|
1020
|
|
1021 (define_expand "epilogue"
|
|
1022 [(clobber (const_int 0))]
|
|
1023 ""
|
|
1024 {
|
|
1025 if (TARGET_SOFT_STACK)
|
|
1026 emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
|
|
1027 SOFTSTACK_PREV_REGNUM)));
|
|
1028 emit_jump_insn (gen_return ());
|
|
1029 DONE;
|
|
1030 })
|
|
1031
|
|
1032 (define_expand "nonlocal_goto"
|
|
1033 [(match_operand 0 "" "")
|
|
1034 (match_operand 1 "" "")
|
|
1035 (match_operand 2 "" "")
|
|
1036 (match_operand 3 "" "")]
|
|
1037 ""
|
|
1038 {
|
|
1039 sorry ("target cannot support nonlocal goto.");
|
|
1040 emit_insn (gen_nop ());
|
|
1041 DONE;
|
|
1042 })
|
|
1043
|
|
1044 (define_expand "nonlocal_goto_receiver"
|
|
1045 [(const_int 0)]
|
|
1046 ""
|
|
1047 {
|
|
1048 sorry ("target cannot support nonlocal goto.");
|
|
1049 })
|
|
1050
|
|
1051 (define_expand "allocate_stack"
|
|
1052 [(match_operand 0 "nvptx_register_operand")
|
|
1053 (match_operand 1 "nvptx_register_operand")]
|
|
1054 ""
|
|
1055 {
|
|
1056 if (TARGET_SOFT_STACK)
|
|
1057 {
|
|
1058 emit_move_insn (stack_pointer_rtx,
|
|
1059 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
|
|
1060 emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
|
|
1061 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
|
|
1062 DONE;
|
|
1063 }
|
|
1064 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
|
|
1065 only) but notes it is not implemented. The assembler emits a
|
|
1066 confused error message. Issue a blunt one now instead. */
|
|
1067 sorry ("target cannot support alloca.");
|
|
1068 emit_insn (gen_nop ());
|
|
1069 DONE;
|
|
1070 })
|
|
1071
|
|
1072 (define_insn "set_softstack_insn"
|
|
1073 [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
|
|
1074 UNSPEC_SET_SOFTSTACK)]
|
|
1075 "TARGET_SOFT_STACK"
|
|
1076 {
|
|
1077 return nvptx_output_set_softstack (REGNO (operands[0]));
|
|
1078 })
|
|
1079
|
|
1080 (define_expand "restore_stack_block"
|
|
1081 [(match_operand 0 "register_operand" "")
|
|
1082 (match_operand 1 "register_operand" "")]
|
|
1083 ""
|
|
1084 {
|
|
1085 if (TARGET_SOFT_STACK)
|
|
1086 {
|
|
1087 emit_move_insn (operands[0], operands[1]);
|
|
1088 emit_insn (gen_set_softstack_insn (operands[0]));
|
|
1089 }
|
|
1090 DONE;
|
|
1091 })
|
|
1092
|
|
1093 (define_expand "restore_stack_function"
|
|
1094 [(match_operand 0 "register_operand" "")
|
|
1095 (match_operand 1 "register_operand" "")]
|
|
1096 ""
|
|
1097 {
|
|
1098 DONE;
|
|
1099 })
|
|
1100
|
|
1101 (define_insn "trap"
|
|
1102 [(trap_if (const_int 1) (const_int 0))]
|
|
1103 ""
|
131
|
1104 "trap; exit;")
|
111
|
1105
|
|
1106 (define_insn "trap_if_true"
|
|
1107 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
|
|
1108 (const_int 0))
|
|
1109 (const_int 0))]
|
|
1110 ""
|
131
|
1111 "%j0 trap; %j0 exit;"
|
111
|
1112 [(set_attr "predicable" "false")])
|
|
1113
|
|
1114 (define_insn "trap_if_false"
|
|
1115 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
|
|
1116 (const_int 0))
|
|
1117 (const_int 0))]
|
|
1118 ""
|
131
|
1119 "%J0 trap; %J0 exit;"
|
111
|
1120 [(set_attr "predicable" "false")])
|
|
1121
|
|
1122 (define_expand "ctrap<mode>4"
|
|
1123 [(trap_if (match_operator 0 "nvptx_comparison_operator"
|
|
1124 [(match_operand:SDIM 1 "nvptx_register_operand")
|
|
1125 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
|
|
1126 (match_operand 3 "const0_operand"))]
|
|
1127 ""
|
|
1128 {
|
|
1129 rtx t = nvptx_expand_compare (operands[0]);
|
|
1130 emit_insn (gen_trap_if_true (t));
|
|
1131 DONE;
|
|
1132 })
|
|
1133
|
|
1134 (define_insn "oacc_dim_size"
|
|
1135 [(set (match_operand:SI 0 "nvptx_register_operand" "")
|
|
1136 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
|
|
1137 UNSPEC_DIM_SIZE))]
|
|
1138 ""
|
|
1139 {
|
|
1140 static const char *const asms[] =
|
|
1141 { /* Must match oacc_loop_levels ordering. */
|
|
1142 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
|
|
1143 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
|
|
1144 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
|
|
1145 };
|
|
1146 return asms[INTVAL (operands[1])];
|
|
1147 })
|
|
1148
|
|
1149 (define_insn "oacc_dim_pos"
|
|
1150 [(set (match_operand:SI 0 "nvptx_register_operand" "")
|
|
1151 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
|
|
1152 UNSPECV_DIM_POS))]
|
|
1153 ""
|
|
1154 {
|
|
1155 static const char *const asms[] =
|
|
1156 { /* Must match oacc_loop_levels ordering. */
|
|
1157 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
|
|
1158 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
|
|
1159 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
|
|
1160 };
|
|
1161 return asms[INTVAL (operands[1])];
|
|
1162 })
|
|
1163
|
|
1164 (define_insn "nvptx_fork"
|
|
1165 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
|
|
1166 UNSPECV_FORK)]
|
|
1167 ""
|
|
1168 "// fork %0;"
|
|
1169 [(set_attr "predicable" "false")])
|
|
1170
|
|
1171 (define_insn "nvptx_forked"
|
|
1172 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
|
|
1173 UNSPECV_FORKED)]
|
|
1174 ""
|
|
1175 "// forked %0;"
|
|
1176 [(set_attr "predicable" "false")])
|
|
1177
|
|
1178 (define_insn "nvptx_joining"
|
|
1179 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
|
|
1180 UNSPECV_JOINING)]
|
|
1181 ""
|
|
1182 "// joining %0;"
|
|
1183 [(set_attr "predicable" "false")])
|
|
1184
|
|
1185 (define_insn "nvptx_join"
|
|
1186 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
|
|
1187 UNSPECV_JOIN)]
|
|
1188 ""
|
|
1189 "// join %0;"
|
|
1190 [(set_attr "predicable" "false")])
|
|
1191
|
|
1192 (define_expand "oacc_fork"
|
|
1193 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
|
|
1194 (match_operand:SI 1 "general_operand" ""))
|
|
1195 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
|
|
1196 UNSPECV_FORKED)]
|
|
1197 ""
|
|
1198 {
|
|
1199 if (operands[0] != const0_rtx)
|
|
1200 emit_move_insn (operands[0], operands[1]);
|
|
1201 nvptx_expand_oacc_fork (INTVAL (operands[2]));
|
|
1202 DONE;
|
|
1203 })
|
|
1204
|
|
1205 (define_expand "oacc_join"
|
|
1206 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
|
|
1207 (match_operand:SI 1 "general_operand" ""))
|
|
1208 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
|
|
1209 UNSPECV_JOIN)]
|
|
1210 ""
|
|
1211 {
|
|
1212 if (operands[0] != const0_rtx)
|
|
1213 emit_move_insn (operands[0], operands[1]);
|
|
1214 nvptx_expand_oacc_join (INTVAL (operands[2]));
|
|
1215 DONE;
|
|
1216 })
|
|
1217
|
|
1218 ;; only 32-bit shuffles exist.
|
|
1219 (define_insn "nvptx_shuffle<mode>"
|
|
1220 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
|
|
1221 (unspec:BITS
|
|
1222 [(match_operand:BITS 1 "nvptx_register_operand" "R")
|
|
1223 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
|
|
1224 (match_operand:SI 3 "const_int_operand" "n")]
|
|
1225 UNSPEC_SHUFFLE))]
|
|
1226 ""
|
|
1227 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
|
|
1228
|
|
1229 (define_insn "nvptx_vote_ballot"
|
|
1230 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
1231 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
|
|
1232 UNSPEC_VOTE_BALLOT))]
|
|
1233 ""
|
|
1234 "%.\\tvote.ballot.b32\\t%0, %1;")
|
|
1235
|
|
1236 ;; Patterns for OpenMP SIMD-via-SIMT lowering
|
|
1237
|
|
1238 (define_insn "omp_simt_enter_insn"
|
|
1239 [(set (match_operand 0 "nvptx_register_operand" "=R")
|
|
1240 (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
|
1241 (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
|
|
1242 UNSPECV_SIMT_ENTER))]
|
|
1243 ""
|
|
1244 {
|
|
1245 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
|
|
1246 })
|
|
1247
|
|
1248 (define_expand "omp_simt_enter"
|
|
1249 [(match_operand 0 "nvptx_register_operand" "=R")
|
|
1250 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
|
|
1251 (match_operand 2 "const_int_operand" "n")]
|
|
1252 ""
|
|
1253 {
|
|
1254 if (!CONST_INT_P (operands[1]))
|
|
1255 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
|
|
1256 else
|
|
1257 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
|
|
1258 cfun->machine->simt_stack_size);
|
|
1259 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
|
|
1260 cfun->machine->simt_stack_align);
|
|
1261 cfun->machine->has_simtreg = true;
|
|
1262 emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
|
|
1263 DONE;
|
|
1264 })
|
|
1265
|
|
1266 (define_insn "omp_simt_exit"
|
|
1267 [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
|
|
1268 UNSPECV_SIMT_EXIT)]
|
|
1269 ""
|
|
1270 {
|
|
1271 return nvptx_output_simt_exit (operands[0]);
|
|
1272 })
|
|
1273
|
|
1274 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
|
|
1275 (define_insn "omp_simt_lane"
|
|
1276 [(set (match_operand:SI 0 "nvptx_register_operand" "")
|
|
1277 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
|
|
1278 ""
|
|
1279 "%.\\tmov.u32\\t%0, %%laneid;")
|
|
1280
|
|
1281 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
|
|
1282 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
|
|
1283 (define_expand "omp_simt_ordered"
|
|
1284 [(match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
1285 (match_operand:SI 1 "nvptx_register_operand" "R")]
|
|
1286 ""
|
|
1287 {
|
|
1288 emit_move_insn (operands[0], operands[1]);
|
|
1289 emit_insn (gen_nvptx_nounroll ());
|
|
1290 DONE;
|
|
1291 })
|
|
1292
|
|
1293 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
|
|
1294 ;; across lanes
|
|
1295 (define_expand "omp_simt_xchg_bfly"
|
|
1296 [(match_operand 0 "nvptx_register_operand" "=R")
|
|
1297 (match_operand 1 "nvptx_register_operand" "R")
|
|
1298 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
|
|
1299 ""
|
|
1300 {
|
|
1301 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
|
|
1302 SHUFFLE_BFLY));
|
|
1303 DONE;
|
|
1304 })
|
|
1305
|
|
1306 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
|
|
1307 ;; from lane given by index in operand 2 to operand 0 in all lanes
|
|
1308 (define_expand "omp_simt_xchg_idx"
|
|
1309 [(match_operand 0 "nvptx_register_operand" "=R")
|
|
1310 (match_operand 1 "nvptx_register_operand" "R")
|
|
1311 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
|
|
1312 ""
|
|
1313 {
|
|
1314 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
|
|
1315 SHUFFLE_IDX));
|
|
1316 DONE;
|
|
1317 })
|
|
1318
|
|
1319 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
|
|
1320 ;; set operand 0 to zero iff all lanes supply zero in operand 1
|
|
1321 (define_expand "omp_simt_vote_any"
|
|
1322 [(match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
1323 (match_operand:SI 1 "nvptx_register_operand" "R")]
|
|
1324 ""
|
|
1325 {
|
|
1326 rtx pred = gen_reg_rtx (BImode);
|
|
1327 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
|
|
1328 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
|
|
1329 DONE;
|
|
1330 })
|
|
1331
|
|
1332 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
|
|
1333 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
|
|
1334 (define_expand "omp_simt_last_lane"
|
|
1335 [(match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
1336 (match_operand:SI 1 "nvptx_register_operand" "R")]
|
|
1337 ""
|
|
1338 {
|
|
1339 rtx pred = gen_reg_rtx (BImode);
|
|
1340 rtx tmp = gen_reg_rtx (SImode);
|
|
1341 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
|
|
1342 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
|
|
1343 emit_insn (gen_ctzsi2 (operands[0], tmp));
|
|
1344 DONE;
|
|
1345 })
|
|
1346
|
|
1347 ;; extract parts of a 64 bit object into 2 32-bit ints
|
|
1348 (define_insn "unpack<mode>si2"
|
|
1349 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
|
1350 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
|
|
1351 (const_int 0)] UNSPEC_BIT_CONV))
|
|
1352 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
|
|
1353 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
|
|
1354 ""
|
|
1355 "%.\\tmov.b64\\t{%0,%1}, %2;")
|
|
1356
|
|
1357 ;; pack 2 32-bit ints into a 64 bit object
|
|
1358 (define_insn "packsi<mode>2"
|
|
1359 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
|
|
1360 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
|
|
1361 (match_operand:SI 2 "nvptx_register_operand" "R")]
|
|
1362 UNSPEC_BIT_CONV))]
|
|
1363 ""
|
|
1364 "%.\\tmov.b64\\t%0, {%1,%2};")
|
|
1365
|
|
1366 ;; Atomic insns.
|
|
1367
|
|
1368 (define_expand "atomic_compare_and_swap<mode>"
|
|
1369 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
|
|
1370 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
|
|
1371 (match_operand:SDIM 2 "memory_operand") ;; memory
|
|
1372 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
|
|
1373 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
|
|
1374 (match_operand:SI 5 "const_int_operand") ;; is_weak
|
|
1375 (match_operand:SI 6 "const_int_operand") ;; success model
|
|
1376 (match_operand:SI 7 "const_int_operand")] ;; failure model
|
|
1377 ""
|
|
1378 {
|
|
1379 emit_insn (gen_atomic_compare_and_swap<mode>_1
|
|
1380 (operands[1], operands[2], operands[3], operands[4], operands[6]));
|
|
1381
|
|
1382 rtx cond = gen_reg_rtx (BImode);
|
|
1383 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
|
|
1384 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
|
|
1385 DONE;
|
|
1386 })
|
|
1387
|
|
1388 (define_insn "atomic_compare_and_swap<mode>_1"
|
|
1389 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
1390 (unspec_volatile:SDIM
|
|
1391 [(match_operand:SDIM 1 "memory_operand" "+m")
|
|
1392 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
|
|
1393 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
|
|
1394 (match_operand:SI 4 "const_int_operand")]
|
|
1395 UNSPECV_CAS))
|
|
1396 (set (match_dup 1)
|
|
1397 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
|
|
1398 ""
|
|
1399 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
|
|
1400 [(set_attr "atomic" "true")])
|
|
1401
|
|
1402 (define_insn "atomic_exchange<mode>"
|
|
1403 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
|
|
1404 (unspec_volatile:SDIM
|
|
1405 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
|
|
1406 (match_operand:SI 3 "const_int_operand")] ;; model
|
|
1407 UNSPECV_XCHG))
|
|
1408 (set (match_dup 1)
|
|
1409 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
|
|
1410 ""
|
|
1411 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
|
|
1412 [(set_attr "atomic" "true")])
|
|
1413
|
|
1414 (define_insn "atomic_fetch_add<mode>"
|
|
1415 [(set (match_operand:SDIM 1 "memory_operand" "+m")
|
|
1416 (unspec_volatile:SDIM
|
|
1417 [(plus:SDIM (match_dup 1)
|
|
1418 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
|
|
1419 (match_operand:SI 3 "const_int_operand")] ;; model
|
|
1420 UNSPECV_LOCK))
|
|
1421 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
1422 (match_dup 1))]
|
|
1423 ""
|
|
1424 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
|
|
1425 [(set_attr "atomic" "true")])
|
|
1426
|
|
1427 (define_insn "atomic_fetch_addsf"
|
|
1428 [(set (match_operand:SF 1 "memory_operand" "+m")
|
|
1429 (unspec_volatile:SF
|
|
1430 [(plus:SF (match_dup 1)
|
|
1431 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
|
|
1432 (match_operand:SI 3 "const_int_operand")] ;; model
|
|
1433 UNSPECV_LOCK))
|
|
1434 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
|
|
1435 (match_dup 1))]
|
|
1436 ""
|
|
1437 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
|
|
1438 [(set_attr "atomic" "true")])
|
|
1439
|
|
1440 (define_code_iterator any_logic [and ior xor])
|
|
1441 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
|
|
1442
|
|
1443 (define_insn "atomic_fetch_<logic><mode>"
|
|
1444 [(set (match_operand:SDIM 1 "memory_operand" "+m")
|
|
1445 (unspec_volatile:SDIM
|
|
1446 [(any_logic:SDIM (match_dup 1)
|
|
1447 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
|
|
1448 (match_operand:SI 3 "const_int_operand")] ;; model
|
|
1449 UNSPECV_LOCK))
|
|
1450 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
|
|
1451 (match_dup 1))]
|
131
|
1452 "<MODE>mode == SImode || TARGET_SM35"
|
111
|
1453 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
|
|
1454 [(set_attr "atomic" "true")])
|
|
1455
|
|
1456 (define_insn "nvptx_barsync"
|
|
1457 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
|
|
1458 UNSPECV_BARSYNC)]
|
|
1459 ""
|
|
1460 "\\tbar.sync\\t%0;"
|
|
1461 [(set_attr "predicable" "false")])
|
|
1462
|
131
|
1463 (define_expand "memory_barrier"
|
|
1464 [(set (match_dup 0)
|
|
1465 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
|
|
1466 ""
|
|
1467 {
|
|
1468 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
|
1469 MEM_VOLATILE_P (operands[0]) = 1;
|
|
1470 })
|
|
1471
|
|
1472 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
|
|
1473 ;; (corresponding to cuda functions threadfence_block, threadfence and
|
|
1474 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
|
|
1475 ;; may be overconservative, but before using membar.gl instead we'll need to
|
|
1476 ;; explain in detail why it's safe to use. For now, use membar.sys.
|
|
1477 (define_insn "*memory_barrier"
|
|
1478 [(set (match_operand:BLK 0 "" "")
|
|
1479 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
|
|
1480 ""
|
|
1481 "\\tmembar.sys;"
|
|
1482 [(set_attr "predicable" "false")])
|
|
1483
|
|
1484 (define_expand "nvptx_membar_cta"
|
|
1485 [(set (match_dup 0)
|
|
1486 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
|
|
1487 ""
|
|
1488 {
|
|
1489 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
|
|
1490 MEM_VOLATILE_P (operands[0]) = 1;
|
|
1491 })
|
|
1492
|
|
1493 (define_insn "*nvptx_membar_cta"
|
|
1494 [(set (match_operand:BLK 0 "" "")
|
|
1495 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
|
|
1496 ""
|
|
1497 "\\tmembar.cta;"
|
|
1498 [(set_attr "predicable" "false")])
|
|
1499
|
111
|
1500 (define_insn "nvptx_nounroll"
|
|
1501 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
|
|
1502 ""
|
|
1503 "\\t.pragma \\\"nounroll\\\";"
|
|
1504 [(set_attr "predicable" "false")])
|