145
|
1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
|
|
2
|
|
3 ;; This file is free software; you can redistribute it and/or modify it under
|
|
4 ;; the terms of the GNU General Public License as published by the Free
|
|
5 ;; Software Foundation; either version 3 of the License, or (at your option)
|
|
6 ;; any later version.
|
|
7
|
|
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
|
|
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
11 ;; for more details.
|
|
12
|
|
13 ;; You should have received a copy of the GNU General Public License
|
|
14 ;; along with GCC; see the file COPYING3. If not see
|
|
15 ;; <http://www.gnu.org/licenses/>.
|
|
16
|
|
17 ;; {{{ Vector iterators
|
|
18
|
|
19 ; Vector modes for one vector register
|
|
20 (define_mode_iterator VEC_1REG_MODE
|
|
21 [V64SI V64HF V64SF])
|
|
22 (define_mode_iterator VEC_1REG_ALT
|
|
23 [V64SI V64HF V64SF])
|
|
24 (define_mode_iterator VEC_ALL1REG_MODE
|
|
25 [V64QI V64HI V64SI V64HF V64SF])
|
|
26
|
|
27 (define_mode_iterator VEC_1REG_INT_MODE
|
|
28 [V64SI])
|
|
29 (define_mode_iterator VEC_ALL1REG_INT_MODE
|
|
30 [V64QI V64HI V64SI])
|
|
31 (define_mode_iterator VEC_ALL1REG_INT_ALT
|
|
32 [V64QI V64HI V64SI])
|
|
33
|
|
34 ; Vector modes for two vector registers
|
|
35 (define_mode_iterator VEC_2REG_MODE
|
|
36 [V64DI V64DF])
|
|
37
|
|
38 ; All of above
|
|
39 (define_mode_iterator VEC_REG_MODE
|
|
40 [V64SI V64HF V64SF ; Single reg
|
|
41 V64DI V64DF]) ; Double reg
|
|
42 (define_mode_iterator VEC_ALLREG_MODE
|
|
43 [V64QI V64HI V64SI V64HF V64SF ; Single reg
|
|
44 V64DI V64DF]) ; Double reg
|
|
45 (define_mode_iterator VEC_ALLREG_ALT
|
|
46 [V64QI V64HI V64SI V64HF V64SF ; Single reg
|
|
47 V64DI V64DF]) ; Double reg
|
|
48 (define_mode_iterator VEC_ALLREG_INT_MODE
|
|
49 [V64QI V64HI V64SI ; Single reg
|
|
50 V64DI]) ; Double reg
|
|
51
|
|
52 (define_mode_attr scalar_mode
|
|
53 [(V64QI "qi") (V64HI "hi") (V64SI "si")
|
|
54 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
|
|
55
|
|
56 (define_mode_attr SCALAR_MODE
|
|
57 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
|
|
58 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
|
|
59
|
|
60 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
|
|
61
|
|
62 ;; }}}
|
|
63 ;; {{{ Substitutions
|
|
64
|
|
65 (define_subst_attr "exec" "vec_merge"
|
|
66 "" "_exec")
|
|
67 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
|
|
68 "" "_exec")
|
|
69 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
|
|
70 "" "_exec")
|
|
71 (define_subst_attr "exec_scatter" "scatter_store"
|
|
72 "" "_exec")
|
|
73
|
|
74 (define_subst "vec_merge"
|
|
75 [(set (match_operand:VEC_ALLREG_MODE 0)
|
|
76 (match_operand:VEC_ALLREG_MODE 1))]
|
|
77 ""
|
|
78 [(set (match_dup 0)
|
|
79 (vec_merge:VEC_ALLREG_MODE
|
|
80 (match_dup 1)
|
|
81 (match_operand:VEC_ALLREG_MODE 3
|
|
82 "gcn_register_or_unspec_operand" "U0")
|
|
83 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
|
|
84
|
|
85 (define_subst "vec_merge_with_clobber"
|
|
86 [(set (match_operand:VEC_ALLREG_MODE 0)
|
|
87 (match_operand:VEC_ALLREG_MODE 1))
|
|
88 (clobber (match_operand 2))]
|
|
89 ""
|
|
90 [(set (match_dup 0)
|
|
91 (vec_merge:VEC_ALLREG_MODE
|
|
92 (match_dup 1)
|
|
93 (match_operand:VEC_ALLREG_MODE 3
|
|
94 "gcn_register_or_unspec_operand" "U0")
|
|
95 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
|
|
96 (clobber (match_dup 2))])
|
|
97
|
|
98 (define_subst "vec_merge_with_vcc"
|
|
99 [(set (match_operand:VEC_ALLREG_MODE 0)
|
|
100 (match_operand:VEC_ALLREG_MODE 1))
|
|
101 (set (match_operand:DI 2)
|
|
102 (match_operand:DI 3))]
|
|
103 ""
|
|
104 [(parallel
|
|
105 [(set (match_dup 0)
|
|
106 (vec_merge:VEC_ALLREG_MODE
|
|
107 (match_dup 1)
|
|
108 (match_operand:VEC_ALLREG_MODE 4
|
|
109 "gcn_register_or_unspec_operand" "U0")
|
|
110 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
|
|
111 (set (match_dup 2)
|
|
112 (and:DI (match_dup 3)
|
|
113 (reg:DI EXEC_REG)))])])
|
|
114
|
|
115 (define_subst "scatter_store"
|
|
116 [(set (mem:BLK (scratch))
|
|
117 (unspec:BLK
|
|
118 [(match_operand 0)
|
|
119 (match_operand 1)
|
|
120 (match_operand 2)
|
|
121 (match_operand 3)]
|
|
122 UNSPEC_SCATTER))]
|
|
123 ""
|
|
124 [(set (mem:BLK (scratch))
|
|
125 (unspec:BLK
|
|
126 [(match_dup 0)
|
|
127 (match_dup 1)
|
|
128 (match_dup 2)
|
|
129 (match_dup 3)
|
|
130 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
|
|
131 UNSPEC_SCATTER))])
|
|
132
|
|
133 ;; }}}
|
|
134 ;; {{{ Vector moves
|
|
135
|
|
136 ; This is the entry point for all vector register moves. Memory accesses can
|
|
137 ; come this way also, but will more usually use the reload_in/out,
|
|
138 ; gather/scatter, maskload/store, etc.
|
|
139
|
|
140 (define_expand "mov<mode>"
|
|
141 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
|
|
142 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
|
|
143 ""
|
|
144 {
|
|
145 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
|
|
146 {
|
|
147 operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
148 rtx scratch = gen_rtx_SCRATCH (V64DImode);
|
|
149 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
|
|
150 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
|
|
151 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
|
|
152 operands[0],
|
|
153 scratch);
|
|
154 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
|
|
155 DONE;
|
|
156 }
|
|
157 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
|
|
158 {
|
|
159 rtx scratch = gen_rtx_SCRATCH (V64DImode);
|
|
160 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
|
|
161 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
|
|
162 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
|
|
163 operands[1],
|
|
164 scratch);
|
|
165 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
|
|
166 DONE;
|
|
167 }
|
|
168 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
|
|
169 {
|
|
170 gcc_assert (!reload_completed);
|
|
171 rtx scratch = gen_reg_rtx (V64DImode);
|
|
172 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
|
|
173 DONE;
|
|
174 }
|
|
175 })
|
|
176
|
|
177 ; A pseudo instruction that helps LRA use the "U0" constraint.
|
|
178
|
|
179 (define_insn "mov<mode>_unspec"
|
|
180 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
|
|
181 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
|
|
182 ""
|
|
183 ""
|
|
184 [(set_attr "type" "unknown")
|
|
185 (set_attr "length" "0")])
|
|
186
|
|
187 (define_insn "*mov<mode>"
|
|
188 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
|
|
189 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
|
|
190 ""
|
|
191 "v_mov_b32\t%0, %1"
|
|
192 [(set_attr "type" "vop1,vop1")
|
|
193 (set_attr "length" "4,8")])
|
|
194
|
|
195 (define_insn "mov<mode>_exec"
|
|
196 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
|
|
197 "=v, v, v, v, v, m")
|
|
198 (vec_merge:VEC_ALL1REG_MODE
|
|
199 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
|
|
200 "vA, B, v,vA, m, v")
|
|
201 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
|
|
202 "U0,U0,vA,vA,U0,U0")
|
|
203 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
|
|
204 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
|
|
205 "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
206 "@
|
|
207 v_mov_b32\t%0, %1
|
|
208 v_mov_b32\t%0, %1
|
|
209 v_cndmask_b32\t%0, %3, %1, vcc
|
|
210 v_cndmask_b32\t%0, %3, %1, %2
|
|
211 #
|
|
212 #"
|
|
213 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
|
|
214 (set_attr "length" "4,8,4,8,16,16")])
|
|
215
|
|
216 ; This variant does not accept an unspec, but does permit MEM
|
|
217 ; read/modify/write which is necessary for maskstore.
|
|
218
|
|
219 ;(define_insn "*mov<mode>_exec_match"
|
|
220 ; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
|
|
221 ; "=v,v, v, m")
|
|
222 ; (vec_merge:VEC_ALL1REG_MODE
|
|
223 ; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
|
|
224 ; (match_dup 0)
|
|
225 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
|
|
226 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
|
|
227 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
228 ; "@
|
|
229 ; v_mov_b32\t%0, %1
|
|
230 ; v_mov_b32\t%0, %1
|
|
231 ; #
|
|
232 ; #"
|
|
233 ; [(set_attr "type" "vop1,vop1,*,*")
|
|
234 ; (set_attr "length" "4,8,16,16")])
|
|
235
|
|
236 (define_insn "*mov<mode>"
|
|
237 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
|
|
238 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
|
|
239 ""
|
|
240 {
|
|
241 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
|
|
242 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
|
|
243 else
|
|
244 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
|
|
245 }
|
|
246 [(set_attr "type" "vmult")
|
|
247 (set_attr "length" "16")])
|
|
248
|
|
249 (define_insn "mov<mode>_exec"
|
|
250 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
|
|
251 "= v, v, v, v, m")
|
|
252 (vec_merge:VEC_2REG_MODE
|
|
253 (match_operand:VEC_2REG_MODE 1 "general_operand"
|
|
254 "vDB, v0, v0, m, v")
|
|
255 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
|
|
256 " U0,vDA0,vDA0,U0,U0")
|
|
257 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
|
|
258 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
|
|
259 "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
260 {
|
|
261 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
|
|
262 switch (which_alternative)
|
|
263 {
|
|
264 case 0:
|
|
265 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
|
|
266 case 1:
|
|
267 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
|
|
268 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
|
|
269 case 2:
|
|
270 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
|
|
271 "v_cndmask_b32\t%H0, %H3, %H1, %2";
|
|
272 }
|
|
273 else
|
|
274 switch (which_alternative)
|
|
275 {
|
|
276 case 0:
|
|
277 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
|
|
278 case 1:
|
|
279 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
|
|
280 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
|
|
281 case 2:
|
|
282 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
|
|
283 "v_cndmask_b32\t%L0, %L3, %L1, %2";
|
|
284 }
|
|
285
|
|
286 return "#";
|
|
287 }
|
|
288 [(set_attr "type" "vmult,vmult,vmult,*,*")
|
|
289 (set_attr "length" "16,16,16,16,16")])
|
|
290
|
|
291 ; This variant does not accept an unspec, but does permit MEM
|
|
292 ; read/modify/write which is necessary for maskstore.
|
|
293
|
|
294 ;(define_insn "*mov<mode>_exec_match"
|
|
295 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
|
|
296 ; (vec_merge:VEC_2REG_MODE
|
|
297 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
|
|
298 ; (match_dup 0)
|
|
299 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
|
|
300 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
|
|
301 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
|
|
302 ; "@
|
|
303 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
|
|
304 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
|
|
305 ; else \
|
|
306 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
|
|
307 ; #
|
|
308 ; #"
|
|
309 ; [(set_attr "type" "vmult,*,*")
|
|
310 ; (set_attr "length" "16,16,16")])
|
|
311
|
|
312 ; A SGPR-base load looks like:
|
|
313 ; <load> v, Sv
|
|
314 ;
|
|
315 ; There's no hardware instruction that corresponds to this, but vector base
|
|
316 ; addresses are placed in an SGPR because it is easier to add to a vector.
|
|
317 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
|
|
318 ;
|
|
319 ; Rewrite as:
|
|
320 ; vT = v1 << log2(element-size)
|
|
321 ; vT += Sv
|
|
322 ; flat_load v, vT
|
|
323
|
|
324 (define_insn "mov<mode>_sgprbase"
|
|
325 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
|
|
326 "= v, v, v, m")
|
|
327 (unspec:VEC_ALL1REG_MODE
|
|
328 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
|
|
329 " vA,vB, m, v")]
|
|
330 UNSPEC_SGPRBASE))
|
|
331 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
|
|
332 "lra_in_progress || reload_completed"
|
|
333 "@
|
|
334 v_mov_b32\t%0, %1
|
|
335 v_mov_b32\t%0, %1
|
|
336 #
|
|
337 #"
|
|
338 [(set_attr "type" "vop1,vop1,*,*")
|
|
339 (set_attr "length" "4,8,12,12")])
|
|
340
|
|
341 (define_insn "mov<mode>_sgprbase"
|
|
342 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
|
|
343 (unspec:VEC_2REG_MODE
|
|
344 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
|
|
345 UNSPEC_SGPRBASE))
|
|
346 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
|
|
347 "lra_in_progress || reload_completed"
|
|
348 "@
|
|
349 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
|
|
350 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
|
|
351 else \
|
|
352 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
|
|
353 #
|
|
354 #"
|
|
355 [(set_attr "type" "vmult,*,*")
|
|
356 (set_attr "length" "8,12,12")])
|
|
357
|
|
358 ; reload_in was once a standard name, but here it's only referenced by
|
|
359 ; gcn_secondary_reload. It allows a reload with a scratch register.
|
|
360
|
|
361 (define_expand "reload_in<mode>"
|
|
362 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
|
|
363 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
|
|
364 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
|
|
365 ""
|
|
366 {
|
|
367 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
|
|
368 DONE;
|
|
369 })
|
|
370
|
|
371 ; reload_out is similar to reload_in, above.
|
|
372
|
|
373 (define_expand "reload_out<mode>"
|
|
374 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
|
|
375 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
|
|
376 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
|
|
377 ""
|
|
378 {
|
|
379 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
|
|
380 DONE;
|
|
381 })
|
|
382
|
|
383 ; Expand scalar addresses into gather/scatter patterns
|
|
384
|
|
385 (define_split
|
|
386 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
|
|
387 (unspec:VEC_ALLREG_MODE
|
|
388 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
|
|
389 UNSPEC_SGPRBASE))
|
|
390 (clobber (match_scratch:V64DI 2))]
|
|
391 ""
|
|
392 [(set (mem:BLK (scratch))
|
|
393 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
|
|
394 UNSPEC_SCATTER))]
|
|
395 {
|
|
396 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
|
|
397 operands[0],
|
|
398 operands[2]);
|
|
399 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
|
|
400 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
|
|
401 })
|
|
402
|
|
403 (define_split
|
|
404 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
|
|
405 (vec_merge:VEC_ALLREG_MODE
|
|
406 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
|
|
407 (match_operand:VEC_ALLREG_MODE 2 "")
|
|
408 (match_operand:DI 3 "gcn_exec_reg_operand")))
|
|
409 (clobber (match_scratch:V64DI 4))]
|
|
410 ""
|
|
411 [(set (mem:BLK (scratch))
|
|
412 (unspec:BLK [(match_dup 5) (match_dup 1)
|
|
413 (match_dup 6) (match_dup 7) (match_dup 3)]
|
|
414 UNSPEC_SCATTER))]
|
|
415 {
|
|
416 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
|
|
417 operands[3],
|
|
418 operands[0],
|
|
419 operands[4]);
|
|
420 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
|
|
421 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
|
|
422 })
|
|
423
|
|
424 (define_split
|
|
425 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
|
|
426 (unspec:VEC_ALLREG_MODE
|
|
427 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
|
|
428 UNSPEC_SGPRBASE))
|
|
429 (clobber (match_scratch:V64DI 2))]
|
|
430 ""
|
|
431 [(set (match_dup 0)
|
|
432 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
|
|
433 (mem:BLK (scratch))]
|
|
434 UNSPEC_GATHER))]
|
|
435 {
|
|
436 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
|
|
437 operands[1],
|
|
438 operands[2]);
|
|
439 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
|
|
440 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
|
|
441 })
|
|
442
|
|
443 (define_split
|
|
444 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
|
|
445 (vec_merge:VEC_ALLREG_MODE
|
|
446 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
|
|
447 (match_operand:VEC_ALLREG_MODE 2 "")
|
|
448 (match_operand:DI 3 "gcn_exec_reg_operand")))
|
|
449 (clobber (match_scratch:V64DI 4))]
|
|
450 ""
|
|
451 [(set (match_dup 0)
|
|
452 (vec_merge:VEC_ALLREG_MODE
|
|
453 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
|
|
454 (mem:BLK (scratch))]
|
|
455 UNSPEC_GATHER)
|
|
456 (match_dup 2)
|
|
457 (match_dup 3)))]
|
|
458 {
|
|
459 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
|
|
460 operands[3],
|
|
461 operands[1],
|
|
462 operands[4]);
|
|
463 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
|
|
464 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
|
|
465 })
|
|
466
|
|
467 ; TODO: Add zero/sign extending variants.
|
|
468
|
|
469 ;; }}}
|
|
470 ;; {{{ Lane moves
|
|
471
|
|
472 ; v_writelane and v_readlane work regardless of exec flags.
|
|
473 ; We allow source to be scratch.
|
|
474 ;
|
|
475 ; FIXME these should take A immediates
|
|
476
|
|
477 (define_insn "*vec_set<mode>"
|
|
478 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
|
|
479 (vec_merge:VEC_ALL1REG_MODE
|
|
480 (vec_duplicate:VEC_ALL1REG_MODE
|
|
481 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
|
|
482 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
|
|
483 " U0")
|
|
484 (ashift (const_int 1)
|
|
485 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
|
|
486 ""
|
|
487 "v_writelane_b32 %0, %1, %2"
|
|
488 [(set_attr "type" "vop3a")
|
|
489 (set_attr "length" "8")
|
|
490 (set_attr "exec" "none")
|
|
491 (set_attr "laneselect" "yes")])
|
|
492
|
|
493 ; FIXME: 64bit operations really should be splitters, but I am not sure how
|
|
494 ; to represent vertical subregs.
|
|
495 (define_insn "*vec_set<mode>"
|
|
496 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
|
|
497 (vec_merge:VEC_2REG_MODE
|
|
498 (vec_duplicate:VEC_2REG_MODE
|
|
499 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
|
|
500 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
|
|
501 " U0")
|
|
502 (ashift (const_int 1)
|
|
503 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
|
|
504 ""
|
|
505 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
|
|
506 [(set_attr "type" "vmult")
|
|
507 (set_attr "length" "16")
|
|
508 (set_attr "exec" "none")
|
|
509 (set_attr "laneselect" "yes")])
|
|
510
|
|
511 (define_expand "vec_set<mode>"
|
|
512 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
513 (vec_merge:VEC_ALLREG_MODE
|
|
514 (vec_duplicate:VEC_ALLREG_MODE
|
|
515 (match_operand:<SCALAR_MODE> 1 "register_operand"))
|
|
516 (match_dup 0)
|
|
517 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
|
|
518 "")
|
|
519
|
|
520 (define_insn "*vec_set<mode>_1"
|
|
521 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
|
|
522 (vec_merge:VEC_ALL1REG_MODE
|
|
523 (vec_duplicate:VEC_ALL1REG_MODE
|
|
524 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
|
|
525 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
|
|
526 "U0")
|
|
527 (match_operand:SI 2 "const_int_operand" " i")))]
|
|
528 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
|
|
529 {
|
|
530 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
|
|
531 return "v_writelane_b32 %0, %1, %2";
|
|
532 }
|
|
533 [(set_attr "type" "vop3a")
|
|
534 (set_attr "length" "8")
|
|
535 (set_attr "exec" "none")
|
|
536 (set_attr "laneselect" "yes")])
|
|
537
|
|
538 (define_insn "*vec_set<mode>_1"
|
|
539 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
|
|
540 (vec_merge:VEC_2REG_MODE
|
|
541 (vec_duplicate:VEC_2REG_MODE
|
|
542 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
|
|
543 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
|
|
544 "U0")
|
|
545 (match_operand:SI 2 "const_int_operand" " i")))]
|
|
546 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
|
|
547 {
|
|
548 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
|
|
549 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
|
|
550 }
|
|
551 [(set_attr "type" "vmult")
|
|
552 (set_attr "length" "16")
|
|
553 (set_attr "exec" "none")
|
|
554 (set_attr "laneselect" "yes")])
|
|
555
|
|
556 (define_insn "vec_duplicate<mode><exec>"
|
|
557 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
|
|
558 (vec_duplicate:VEC_ALL1REG_MODE
|
|
559 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
|
|
560 ""
|
|
561 "v_mov_b32\t%0, %1"
|
|
562 [(set_attr "type" "vop3a")
|
|
563 (set_attr "length" "8")])
|
|
564
|
|
565 (define_insn "vec_duplicate<mode><exec>"
|
|
566 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
|
|
567 (vec_duplicate:VEC_2REG_MODE
|
|
568 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
|
|
569 ""
|
|
570 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
|
|
571 [(set_attr "type" "vop3a")
|
|
572 (set_attr "length" "16")])
|
|
573
|
|
574 (define_insn "vec_extract<mode><scalar_mode>"
|
|
575 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
|
|
576 (vec_select:<SCALAR_MODE>
|
|
577 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
|
|
578 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
|
|
579 ""
|
|
580 "v_readlane_b32 %0, %1, %2"
|
|
581 [(set_attr "type" "vop3a")
|
|
582 (set_attr "length" "8")
|
|
583 (set_attr "exec" "none")
|
|
584 (set_attr "laneselect" "yes")])
|
|
585
|
|
586 (define_insn "vec_extract<mode><scalar_mode>"
|
|
587 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
|
|
588 (vec_select:<SCALAR_MODE>
|
|
589 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
|
|
590 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
|
|
591 ""
|
|
592 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
|
|
593 [(set_attr "type" "vmult")
|
|
594 (set_attr "length" "16")
|
|
595 (set_attr "exec" "none")
|
|
596 (set_attr "laneselect" "yes")])
|
|
597
|
|
598 (define_expand "extract_last_<mode>"
|
|
599 [(match_operand:<SCALAR_MODE> 0 "register_operand")
|
|
600 (match_operand:DI 1 "gcn_alu_operand")
|
|
601 (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
|
|
602 "can_create_pseudo_p ()"
|
|
603 {
|
|
604 rtx dst = operands[0];
|
|
605 rtx mask = operands[1];
|
|
606 rtx vect = operands[2];
|
|
607 rtx tmpreg = gen_reg_rtx (SImode);
|
|
608
|
|
609 emit_insn (gen_clzdi2 (tmpreg, mask));
|
|
610 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
|
|
611 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
|
|
612 DONE;
|
|
613 })
|
|
614
|
|
615 (define_expand "fold_extract_last_<mode>"
|
|
616 [(match_operand:<SCALAR_MODE> 0 "register_operand")
|
|
617 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
|
|
618 (match_operand:DI 2 "gcn_alu_operand")
|
|
619 (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
|
|
620 "can_create_pseudo_p ()"
|
|
621 {
|
|
622 rtx dst = operands[0];
|
|
623 rtx default_value = operands[1];
|
|
624 rtx mask = operands[2];
|
|
625 rtx vect = operands[3];
|
|
626 rtx else_label = gen_label_rtx ();
|
|
627 rtx end_label = gen_label_rtx ();
|
|
628
|
|
629 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
|
|
630 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
|
|
631 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
|
|
632 emit_jump_insn (gen_jump (end_label));
|
|
633 emit_barrier ();
|
|
634 emit_label (else_label);
|
|
635 emit_move_insn (dst, default_value);
|
|
636 emit_label (end_label);
|
|
637 DONE;
|
|
638 })
|
|
639
|
|
640 (define_expand "vec_init<mode><scalar_mode>"
|
|
641 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
642 (match_operand 1)]
|
|
643 ""
|
|
644 {
|
|
645 gcn_expand_vector_init (operands[0], operands[1]);
|
|
646 DONE;
|
|
647 })
|
|
648
|
|
649 ;; }}}
|
|
650 ;; {{{ Scatter / Gather
|
|
651
|
|
652 ;; GCN does not have an instruction for loading a vector from contiguous
|
|
653 ;; memory so *all* loads and stores are eventually converted to scatter
|
|
654 ;; or gather.
|
|
655 ;;
|
|
656 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
|
|
657 ;; unspec. The unspec formats are as follows:
|
|
658 ;;
|
|
659 ;; (unspec:V64??
|
|
660 ;; [(<address expression>)
|
|
661 ;; (<addr_space_t>)
|
|
662 ;; (<use_glc>)
|
|
663 ;; (mem:BLK (scratch))]
|
|
664 ;; UNSPEC_GATHER)
|
|
665 ;;
|
|
666 ;; (unspec:BLK
|
|
667 ;; [(<address expression>)
|
|
668 ;; (<source register>)
|
|
669 ;; (<addr_space_t>)
|
|
670 ;; (<use_glc>)
|
|
671 ;; (<exec>)]
|
|
672 ;; UNSPEC_SCATTER)
|
|
673 ;;
|
|
674 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
|
|
675 ;; - The mem:BLK does not contain any real information, but indicates that an
|
|
676 ;; unknown memory read is taking place. Stores are expected to use a similar
|
|
677 ;; mem:BLK outside the unspec.
|
|
678 ;; - The address space and glc (volatile) fields are there to replace the
|
|
679 ;; fields normally found in a MEM.
|
|
680 ;; - Multiple forms of address expression are supported, below.
|
|
681
|
|
682 (define_expand "gather_load<mode>"
|
|
683 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
684 (match_operand:DI 1 "register_operand")
|
|
685 (match_operand 2 "register_operand")
|
|
686 (match_operand 3 "immediate_operand")
|
|
687 (match_operand:SI 4 "gcn_alu_operand")]
|
|
688 ""
|
|
689 {
|
|
690 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
|
|
691 operands[2], operands[4],
|
|
692 INTVAL (operands[3]), NULL);
|
|
693
|
|
694 if (GET_MODE (addr) == V64DImode)
|
|
695 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
|
|
696 const0_rtx, const0_rtx));
|
|
697 else
|
|
698 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
|
|
699 addr, const0_rtx, const0_rtx,
|
|
700 const0_rtx));
|
|
701 DONE;
|
|
702 })
|
|
703
|
|
704 ; Allow any address expression
|
|
705 (define_expand "gather<mode>_expr<exec>"
|
|
706 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
707 (unspec:VEC_ALLREG_MODE
|
|
708 [(match_operand 1 "")
|
|
709 (match_operand 2 "immediate_operand")
|
|
710 (match_operand 3 "immediate_operand")
|
|
711 (mem:BLK (scratch))]
|
|
712 UNSPEC_GATHER))]
|
|
713 ""
|
|
714 {})
|
|
715
|
|
716 (define_insn "gather<mode>_insn_1offset<exec>"
|
|
717 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
|
|
718 (unspec:VEC_ALLREG_MODE
|
|
719 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
|
|
720 (vec_duplicate:V64DI
|
|
721 (match_operand 2 "immediate_operand" " n")))
|
|
722 (match_operand 3 "immediate_operand" " n")
|
|
723 (match_operand 4 "immediate_operand" " n")
|
|
724 (mem:BLK (scratch))]
|
|
725 UNSPEC_GATHER))]
|
|
726 "(AS_FLAT_P (INTVAL (operands[3]))
|
|
727 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
|
|
728 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
|
|
729 || (AS_GLOBAL_P (INTVAL (operands[3]))
|
|
730 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
|
|
731 {
|
|
732 addr_space_t as = INTVAL (operands[3]);
|
|
733 const char *glc = INTVAL (operands[4]) ? " glc" : "";
|
|
734
|
|
735 static char buf[200];
|
|
736 if (AS_FLAT_P (as))
|
|
737 {
|
|
738 if (TARGET_GCN5_PLUS)
|
|
739 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
|
|
740 glc);
|
|
741 else
|
|
742 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
|
|
743 }
|
|
744 else if (AS_GLOBAL_P (as))
|
|
745 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
|
|
746 "s_waitcnt\tvmcnt(0)", glc);
|
|
747 else
|
|
748 gcc_unreachable ();
|
|
749
|
|
750 return buf;
|
|
751 }
|
|
752 [(set_attr "type" "flat")
|
|
753 (set_attr "length" "12")])
|
|
754
|
|
755 (define_insn "gather<mode>_insn_1offset_ds<exec>"
|
|
756 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
|
|
757 (unspec:VEC_ALLREG_MODE
|
|
758 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
|
|
759 (vec_duplicate:V64SI
|
|
760 (match_operand 2 "immediate_operand" " n")))
|
|
761 (match_operand 3 "immediate_operand" " n")
|
|
762 (match_operand 4 "immediate_operand" " n")
|
|
763 (mem:BLK (scratch))]
|
|
764 UNSPEC_GATHER))]
|
|
765 "(AS_ANY_DS_P (INTVAL (operands[3]))
|
|
766 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
|
|
767 {
|
|
768 addr_space_t as = INTVAL (operands[3]);
|
|
769 static char buf[200];
|
|
770 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
|
|
771 (AS_GDS_P (as) ? " gds" : ""));
|
|
772 return buf;
|
|
773 }
|
|
774 [(set_attr "type" "ds")
|
|
775 (set_attr "length" "12")])
|
|
776
|
|
777 (define_insn "gather<mode>_insn_2offsets<exec>"
|
|
778 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
|
|
779 (unspec:VEC_ALLREG_MODE
|
|
780 [(plus:V64DI
|
|
781 (plus:V64DI
|
|
782 (vec_duplicate:V64DI
|
|
783 (match_operand:DI 1 "register_operand" "Sv"))
|
|
784 (sign_extend:V64DI
|
|
785 (match_operand:V64SI 2 "register_operand" " v")))
|
|
786 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
|
|
787 (match_operand 4 "immediate_operand" " n")
|
|
788 (match_operand 5 "immediate_operand" " n")
|
|
789 (mem:BLK (scratch))]
|
|
790 UNSPEC_GATHER))]
|
|
791 "(AS_GLOBAL_P (INTVAL (operands[4]))
|
|
792 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
|
|
793 {
|
|
794 addr_space_t as = INTVAL (operands[4]);
|
|
795 const char *glc = INTVAL (operands[5]) ? " glc" : "";
|
|
796
|
|
797 static char buf[200];
|
|
798 if (AS_GLOBAL_P (as))
|
|
799 {
|
|
800 /* Work around assembler bug in which a 64-bit register is expected,
|
|
801 but a 32-bit value would be correct. */
|
|
802 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
|
|
803 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
|
|
804 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
|
|
805 }
|
|
806 else
|
|
807 gcc_unreachable ();
|
|
808
|
|
809 return buf;
|
|
810 }
|
|
811 [(set_attr "type" "flat")
|
|
812 (set_attr "length" "12")])
|
|
813
|
|
814 (define_expand "scatter_store<mode>"
|
|
815 [(match_operand:DI 0 "register_operand")
|
|
816 (match_operand 1 "register_operand")
|
|
817 (match_operand 2 "immediate_operand")
|
|
818 (match_operand:SI 3 "gcn_alu_operand")
|
|
819 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
|
|
820 ""
|
|
821 {
|
|
822 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
|
|
823 operands[1], operands[3],
|
|
824 INTVAL (operands[2]), NULL);
|
|
825
|
|
826 if (GET_MODE (addr) == V64DImode)
|
|
827 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
|
|
828 const0_rtx, const0_rtx));
|
|
829 else
|
|
830 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
|
|
831 const0_rtx, operands[4],
|
|
832 const0_rtx, const0_rtx));
|
|
833 DONE;
|
|
834 })
|
|
835
|
|
836 (define_expand "scatter<mode>_exec"
|
|
837 [(match_operand:DI 0 "register_operand")
|
|
838 (match_operand 1 "register_operand")
|
|
839 (match_operand 2 "immediate_operand")
|
|
840 (match_operand:SI 3 "gcn_alu_operand")
|
|
841 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
|
|
842 (match_operand:DI 5 "gcn_exec_reg_operand")]
|
|
843 ""
|
|
844 {
|
|
845 operands[5] = force_reg (DImode, operands[5]);
|
|
846
|
|
847 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
|
|
848 operands[1], operands[3],
|
|
849 INTVAL (operands[2]), operands[5]);
|
|
850
|
|
851 if (GET_MODE (addr) == V64DImode)
|
|
852 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
|
|
853 operands[4], const0_rtx,
|
|
854 const0_rtx,
|
|
855 operands[5]));
|
|
856 else
|
|
857 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
|
|
858 const0_rtx, operands[4],
|
|
859 const0_rtx, const0_rtx,
|
|
860 operands[5]));
|
|
861 DONE;
|
|
862 })
|
|
863
|
|
864 ; Allow any address expression
|
|
865 (define_expand "scatter<mode>_expr<exec_scatter>"
|
|
866 [(set (mem:BLK (scratch))
|
|
867 (unspec:BLK
|
|
868 [(match_operand:V64DI 0 "")
|
|
869 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
|
|
870 (match_operand 2 "immediate_operand")
|
|
871 (match_operand 3 "immediate_operand")]
|
|
872 UNSPEC_SCATTER))]
|
|
873 ""
|
|
874 {})
|
|
875
|
|
876 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
|
|
877 [(set (mem:BLK (scratch))
|
|
878 (unspec:BLK
|
|
879 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
|
|
880 (vec_duplicate:V64DI
|
|
881 (match_operand 1 "immediate_operand" "n")))
|
|
882 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
|
|
883 (match_operand 3 "immediate_operand" "n")
|
|
884 (match_operand 4 "immediate_operand" "n")]
|
|
885 UNSPEC_SCATTER))]
|
|
886 "(AS_FLAT_P (INTVAL (operands[3]))
|
|
887 && (INTVAL(operands[1]) == 0
|
|
888 || (TARGET_GCN5_PLUS
|
|
889 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
|
|
890 || (AS_GLOBAL_P (INTVAL (operands[3]))
|
|
891 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
|
|
892 {
|
|
893 addr_space_t as = INTVAL (operands[3]);
|
|
894 const char *glc = INTVAL (operands[4]) ? " glc" : "";
|
|
895
|
|
896 static char buf[200];
|
|
897 if (AS_FLAT_P (as))
|
|
898 {
|
|
899 if (TARGET_GCN5_PLUS)
|
|
900 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
|
|
901 else
|
|
902 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
|
|
903 }
|
|
904 else if (AS_GLOBAL_P (as))
|
|
905 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
|
|
906 else
|
|
907 gcc_unreachable ();
|
|
908
|
|
909 return buf;
|
|
910 }
|
|
911 [(set_attr "type" "flat")
|
|
912 (set_attr "length" "12")])
|
|
913
|
|
914 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
|
|
915 [(set (mem:BLK (scratch))
|
|
916 (unspec:BLK
|
|
917 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
|
|
918 (vec_duplicate:V64SI
|
|
919 (match_operand 1 "immediate_operand" "n")))
|
|
920 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
|
|
921 (match_operand 3 "immediate_operand" "n")
|
|
922 (match_operand 4 "immediate_operand" "n")]
|
|
923 UNSPEC_SCATTER))]
|
|
924 "(AS_ANY_DS_P (INTVAL (operands[3]))
|
|
925 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
|
|
926 {
|
|
927 addr_space_t as = INTVAL (operands[3]);
|
|
928 static char buf[200];
|
|
929 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
|
|
930 (AS_GDS_P (as) ? " gds" : ""));
|
|
931 return buf;
|
|
932 }
|
|
933 [(set_attr "type" "ds")
|
|
934 (set_attr "length" "12")])
|
|
935
|
|
936 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
|
|
937 [(set (mem:BLK (scratch))
|
|
938 (unspec:BLK
|
|
939 [(plus:V64DI
|
|
940 (plus:V64DI
|
|
941 (vec_duplicate:V64DI
|
|
942 (match_operand:DI 0 "register_operand" "Sv"))
|
|
943 (sign_extend:V64DI
|
|
944 (match_operand:V64SI 1 "register_operand" " v")))
|
|
945 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
|
|
946 " n")))
|
|
947 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
|
|
948 (match_operand 4 "immediate_operand" " n")
|
|
949 (match_operand 5 "immediate_operand" " n")]
|
|
950 UNSPEC_SCATTER))]
|
|
951 "(AS_GLOBAL_P (INTVAL (operands[4]))
|
|
952 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
|
|
953 {
|
|
954 addr_space_t as = INTVAL (operands[4]);
|
|
955 const char *glc = INTVAL (operands[5]) ? " glc" : "";
|
|
956
|
|
957 static char buf[200];
|
|
958 if (AS_GLOBAL_P (as))
|
|
959 {
|
|
960 /* Work around assembler bug in which a 64-bit register is expected,
|
|
961 but a 32-bit value would be correct. */
|
|
962 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
|
|
963 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
|
|
964 reg, reg + 1, glc);
|
|
965 }
|
|
966 else
|
|
967 gcc_unreachable ();
|
|
968
|
|
969 return buf;
|
|
970 }
|
|
971 [(set_attr "type" "flat")
|
|
972 (set_attr "length" "12")])
|
|
973
|
|
974 ;; }}}
|
|
975 ;; {{{ Permutations
|
|
976
|
|
977 (define_insn "ds_bpermute<mode>"
|
|
978 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
|
|
979 (unspec:VEC_ALL1REG_MODE
|
|
980 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
|
|
981 (match_operand:V64SI 1 "register_operand" " v")
|
|
982 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
|
|
983 UNSPEC_BPERMUTE))]
|
|
984 ""
|
|
985 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
|
|
986 [(set_attr "type" "vop2")
|
|
987 (set_attr "length" "12")])
|
|
988
|
|
989 (define_insn_and_split "ds_bpermute<mode>"
|
|
990 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
|
|
991 (unspec:VEC_2REG_MODE
|
|
992 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
|
|
993 (match_operand:V64SI 1 "register_operand" " v")
|
|
994 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
|
|
995 UNSPEC_BPERMUTE))]
|
|
996 ""
|
|
997 "#"
|
|
998 "reload_completed"
|
|
999 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
|
|
1000 UNSPEC_BPERMUTE))
|
|
1001 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
|
|
1002 UNSPEC_BPERMUTE))]
|
|
1003 {
|
|
1004 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
|
|
1005 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
|
|
1006 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
|
|
1007 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
|
|
1008 }
|
|
1009 [(set_attr "type" "vmult")
|
|
1010 (set_attr "length" "24")])
|
|
1011
|
|
1012 ;; }}}
|
|
1013 ;; {{{ ALU special case: add/sub
|
|
1014
|
|
1015 (define_insn "add<mode>3<exec_clobber>"
|
|
1016 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
|
|
1017 (plus:VEC_ALL1REG_INT_MODE
|
|
1018 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "% v")
|
|
1019 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" "vSvB")))
|
|
1020 (clobber (reg:DI VCC_REG))]
|
|
1021 ""
|
|
1022 "v_add%^_u32\t%0, vcc, %2, %1"
|
|
1023 [(set_attr "type" "vop2")
|
|
1024 (set_attr "length" "8")])
|
|
1025
|
|
1026 (define_insn "add<mode>3_dup<exec_clobber>"
|
|
1027 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
|
|
1028 (plus:VEC_ALL1REG_INT_MODE
|
|
1029 (vec_duplicate:VEC_ALL1REG_INT_MODE
|
|
1030 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
|
|
1031 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" " v")))
|
|
1032 (clobber (reg:DI VCC_REG))]
|
|
1033 ""
|
|
1034 "v_add%^_u32\t%0, vcc, %2, %1"
|
|
1035 [(set_attr "type" "vop2")
|
|
1036 (set_attr "length" "8")])
|
|
1037
|
|
1038 (define_insn "addv64si3_vcc<exec_vcc>"
|
|
1039 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
|
|
1040 (plus:V64SI
|
|
1041 (match_operand:V64SI 1 "register_operand" "% v, v")
|
|
1042 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
|
|
1043 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
|
|
1044 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
|
|
1045 (match_dup 1)))]
|
|
1046 ""
|
|
1047 "v_add%^_u32\t%0, %3, %2, %1"
|
|
1048 [(set_attr "type" "vop2,vop3b")
|
|
1049 (set_attr "length" "8")])
|
|
1050
|
|
1051 ; This pattern only changes the VCC bits when the corresponding lane is
|
|
1052 ; enabled, so the set must be described as an ior.
|
|
1053
|
|
1054 (define_insn "addv64si3_vcc_dup<exec_vcc>"
|
|
1055 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
|
|
1056 (plus:V64SI
|
|
1057 (vec_duplicate:V64SI
|
|
1058 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
|
|
1059 (match_operand:V64SI 2 "register_operand" " v, v")))
|
|
1060 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
|
|
1061 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
|
|
1062 (match_dup 1))
|
|
1063 (vec_duplicate:V64SI (match_dup 2))))]
|
|
1064 ""
|
|
1065 "v_add%^_u32\t%0, %3, %2, %1"
|
|
1066 [(set_attr "type" "vop2,vop3b")
|
|
1067 (set_attr "length" "8,8")])
|
|
1068
|
|
1069 ; v_addc does not accept an SGPR because the VCC read already counts as an
|
|
1070 ; SGPR use and the number of SGPR operands is limited to 1. It does not
|
|
1071 ; accept "B" immediate constants due to a related bus conflict.
|
|
1072
|
|
1073 (define_insn "addcv64si3<exec_vcc>"
|
|
1074 [(set (match_operand:V64SI 0 "register_operand" "=v, v")
|
|
1075 (plus:V64SI
|
|
1076 (plus:V64SI
|
|
1077 (vec_merge:V64SI
|
|
1078 (vec_duplicate:V64SI (const_int 1))
|
|
1079 (vec_duplicate:V64SI (const_int 0))
|
|
1080 (match_operand:DI 3 "register_operand" " cV,cVSv"))
|
|
1081 (match_operand:V64SI 1 "gcn_alu_operand" "% v, vA"))
|
|
1082 (match_operand:V64SI 2 "gcn_alu_operand" " vA, vA")))
|
|
1083 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
|
|
1084 (ior:DI (ltu:DI (plus:V64SI
|
|
1085 (plus:V64SI
|
|
1086 (vec_merge:V64SI
|
|
1087 (vec_duplicate:V64SI (const_int 1))
|
|
1088 (vec_duplicate:V64SI (const_int 0))
|
|
1089 (match_dup 3))
|
|
1090 (match_dup 1))
|
|
1091 (match_dup 2))
|
|
1092 (match_dup 2))
|
|
1093 (ltu:DI (plus:V64SI
|
|
1094 (vec_merge:V64SI
|
|
1095 (vec_duplicate:V64SI (const_int 1))
|
|
1096 (vec_duplicate:V64SI (const_int 0))
|
|
1097 (match_dup 3))
|
|
1098 (match_dup 1))
|
|
1099 (match_dup 1))))]
|
|
1100 ""
|
|
1101 "v_addc%^_u32\t%0, %4, %2, %1, %3"
|
|
1102 [(set_attr "type" "vop2,vop3b")
|
|
1103 (set_attr "length" "4,8")])
|
|
1104
|
|
1105 (define_insn "sub<mode>3<exec_clobber>"
|
|
1106 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v, v")
|
|
1107 (minus:VEC_ALL1REG_INT_MODE
|
|
1108 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "vSvB, v")
|
|
1109 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " v,vSvB")))
|
|
1110 (clobber (reg:DI VCC_REG))]
|
|
1111 ""
|
|
1112 "@
|
|
1113 v_sub%^_u32\t%0, vcc, %1, %2
|
|
1114 v_subrev%^_u32\t%0, vcc, %2, %1"
|
|
1115 [(set_attr "type" "vop2")
|
|
1116 (set_attr "length" "8,8")])
|
|
1117
|
|
1118 (define_insn "subv64si3_vcc<exec_vcc>"
|
|
1119 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
|
|
1120 (minus:V64SI
|
|
1121 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
|
|
1122 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
|
|
1123 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
|
|
1124 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
|
|
1125 (match_dup 1)))]
|
|
1126 ""
|
|
1127 "@
|
|
1128 v_sub%^_u32\t%0, %3, %1, %2
|
|
1129 v_sub%^_u32\t%0, %3, %1, %2
|
|
1130 v_subrev%^_u32\t%0, %3, %2, %1
|
|
1131 v_subrev%^_u32\t%0, %3, %2, %1"
|
|
1132 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
|
|
1133 (set_attr "length" "8")])
|
|
1134
|
|
1135 ; v_subb does not accept an SGPR because the VCC read already counts as an
|
|
1136 ; SGPR use and the number of SGPR operands is limited to 1. It does not
|
|
1137 ; accept "B" immediate constants due to a related bus conflict.
|
|
1138
|
|
1139 (define_insn "subcv64si3<exec_vcc>"
|
|
1140 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
|
|
1141 (minus:V64SI
|
|
1142 (minus:V64SI
|
|
1143 (vec_merge:V64SI
|
|
1144 (vec_duplicate:V64SI (const_int 1))
|
|
1145 (vec_duplicate:V64SI (const_int 0))
|
|
1146 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
|
|
1147 (match_operand:V64SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
|
|
1148 (match_operand:V64SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
|
|
1149 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
|
|
1150 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
|
|
1151 (vec_merge:V64SI
|
|
1152 (vec_duplicate:V64SI (const_int 1))
|
|
1153 (vec_duplicate:V64SI (const_int 0))
|
|
1154 (match_dup 3))
|
|
1155 (match_dup 1))
|
|
1156 (match_dup 2))
|
|
1157 (match_dup 2))
|
|
1158 (ltu:DI (minus:V64SI (vec_merge:V64SI
|
|
1159 (vec_duplicate:V64SI (const_int 1))
|
|
1160 (vec_duplicate:V64SI (const_int 0))
|
|
1161 (match_dup 3))
|
|
1162 (match_dup 1))
|
|
1163 (match_dup 1))))]
|
|
1164 ""
|
|
1165 "@
|
|
1166 v_subb%^_u32\t%0, %4, %1, %2, %3
|
|
1167 v_subb%^_u32\t%0, %4, %1, %2, %3
|
|
1168 v_subbrev%^_u32\t%0, %4, %2, %1, %3
|
|
1169 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
|
|
1170 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
|
|
1171 (set_attr "length" "4,8,4,8")])
|
|
1172
|
|
1173 (define_insn_and_split "addv64di3"
|
|
1174 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
|
|
1175 (plus:V64DI
|
|
1176 (match_operand:V64DI 1 "register_operand" "%vDb,vDb0")
|
|
1177 (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb")))
|
|
1178 (clobber (reg:DI VCC_REG))]
|
|
1179 ""
|
|
1180 "#"
|
|
1181 "gcn_can_split_p (V64DImode, operands[0])
|
|
1182 && gcn_can_split_p (V64DImode, operands[1])
|
|
1183 && gcn_can_split_p (V64DImode, operands[2])"
|
|
1184 [(const_int 0)]
|
|
1185 {
|
|
1186 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1187 emit_insn (gen_addv64si3_vcc
|
|
1188 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1189 gcn_operand_part (V64DImode, operands[1], 0),
|
|
1190 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1191 vcc));
|
|
1192 emit_insn (gen_addcv64si3
|
|
1193 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1194 gcn_operand_part (V64DImode, operands[1], 1),
|
|
1195 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1196 vcc, vcc));
|
|
1197 DONE;
|
|
1198 }
|
|
1199 [(set_attr "type" "vmult")
|
|
1200 (set_attr "length" "8")])
|
|
1201
|
|
1202 (define_insn_and_split "addv64di3_exec"
|
|
1203 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v")
|
|
1204 (vec_merge:V64DI
|
|
1205 (plus:V64DI
|
|
1206 (match_operand:V64DI 1 "register_operand" "%vDb,vDb0,vDb")
|
|
1207 (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb,vDb"))
|
|
1208 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
|
|
1209 " U, U, 0")
|
|
1210 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e")))
|
|
1211 (clobber (reg:DI VCC_REG))]
|
|
1212 ""
|
|
1213 "#"
|
|
1214 "gcn_can_split_p (V64DImode, operands[0])
|
|
1215 && gcn_can_split_p (V64DImode, operands[1])
|
|
1216 && gcn_can_split_p (V64DImode, operands[2])
|
|
1217 && gcn_can_split_p (V64DImode, operands[4])"
|
|
1218 [(const_int 0)]
|
|
1219 {
|
|
1220 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1221 emit_insn (gen_addv64si3_vcc_exec
|
|
1222 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1223 gcn_operand_part (V64DImode, operands[1], 0),
|
|
1224 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1225 vcc,
|
|
1226 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1227 operands[4]));
|
|
1228 emit_insn (gen_addcv64si3_exec
|
|
1229 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1230 gcn_operand_part (V64DImode, operands[1], 1),
|
|
1231 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1232 vcc, vcc,
|
|
1233 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1234 operands[4]));
|
|
1235 DONE;
|
|
1236 }
|
|
1237 [(set_attr "type" "vmult")
|
|
1238 (set_attr "length" "8")])
|
|
1239
|
|
1240 (define_insn_and_split "subv64di3"
|
|
1241 [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
|
|
1242 (minus:V64DI
|
|
1243 (match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0, v, v0")
|
|
1244 (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vDb0,vDb")))
|
|
1245 (clobber (reg:DI VCC_REG))]
|
|
1246 ""
|
|
1247 "#"
|
|
1248 "gcn_can_split_p (V64DImode, operands[0])
|
|
1249 && gcn_can_split_p (V64DImode, operands[1])
|
|
1250 && gcn_can_split_p (V64DImode, operands[2])"
|
|
1251 [(const_int 0)]
|
|
1252 {
|
|
1253 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1254 emit_insn (gen_subv64si3_vcc
|
|
1255 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1256 gcn_operand_part (V64DImode, operands[1], 0),
|
|
1257 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1258 vcc));
|
|
1259 emit_insn (gen_subcv64si3
|
|
1260 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1261 gcn_operand_part (V64DImode, operands[1], 1),
|
|
1262 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1263 vcc, vcc));
|
|
1264 DONE;
|
|
1265 }
|
|
1266 [(set_attr "type" "vmult")
|
|
1267 (set_attr "length" "8")])
|
|
1268
|
|
1269 (define_insn_and_split "subv64di3_exec"
|
|
1270 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v, &v")
|
|
1271 (vec_merge:V64DI
|
|
1272 (minus:V64DI
|
|
1273 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0, v, v0")
|
|
1274 (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vSvB0,vSvB"))
|
|
1275 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
|
|
1276 " U0, U0, U0, U0")
|
|
1277 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
|
|
1278 (clobber (reg:DI VCC_REG))]
|
|
1279 "register_operand (operands[1], VOIDmode)
|
|
1280 || register_operand (operands[2], VOIDmode)"
|
|
1281 "#"
|
|
1282 "gcn_can_split_p (V64DImode, operands[0])
|
|
1283 && gcn_can_split_p (V64DImode, operands[1])
|
|
1284 && gcn_can_split_p (V64DImode, operands[2])
|
|
1285 && gcn_can_split_p (V64DImode, operands[3])"
|
|
1286 [(const_int 0)]
|
|
1287 {
|
|
1288 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1289 emit_insn (gen_subv64si3_vcc_exec
|
|
1290 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1291 gcn_operand_part (V64DImode, operands[1], 0),
|
|
1292 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1293 vcc,
|
|
1294 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1295 operands[4]));
|
|
1296 emit_insn (gen_subcv64si3_exec
|
|
1297 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1298 gcn_operand_part (V64DImode, operands[1], 1),
|
|
1299 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1300 vcc, vcc,
|
|
1301 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1302 operands[4]));
|
|
1303 DONE;
|
|
1304 }
|
|
1305 [(set_attr "type" "vmult")
|
|
1306 (set_attr "length" "8")])
|
|
1307
|
|
1308 (define_insn_and_split "addv64di3_zext"
|
|
1309 [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
|
|
1310 (plus:V64DI
|
|
1311 (zero_extend:V64DI
|
|
1312 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB, vA, vB"))
|
|
1313 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA,0vDb,0vDA")))
|
|
1314 (clobber (reg:DI VCC_REG))]
|
|
1315 ""
|
|
1316 "#"
|
|
1317 "gcn_can_split_p (V64DImode, operands[0])
|
|
1318 && gcn_can_split_p (V64DImode, operands[2])"
|
|
1319 [(const_int 0)]
|
|
1320 {
|
|
1321 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1322 emit_insn (gen_addv64si3_vcc
|
|
1323 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1324 operands[1],
|
|
1325 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1326 vcc));
|
|
1327 emit_insn (gen_addcv64si3
|
|
1328 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1329 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1330 const0_rtx, vcc, vcc));
|
|
1331 DONE;
|
|
1332 }
|
|
1333 [(set_attr "type" "vmult")
|
|
1334 (set_attr "length" "8")])
|
|
1335
|
|
1336 (define_insn_and_split "addv64di3_zext_exec"
|
|
1337 [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
|
|
1338 (vec_merge:V64DI
|
|
1339 (plus:V64DI
|
|
1340 (zero_extend:V64DI
|
|
1341 (match_operand:V64SI 1 "gcn_alu_operand" "0vA, vA,0vB, vB"))
|
|
1342 (match_operand:V64DI 2 "gcn_alu_operand" "vDb,0vDb,vDA,0vDA"))
|
|
1343 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
|
|
1344 " U0, U0, U0, U0")
|
|
1345 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
|
|
1346 (clobber (reg:DI VCC_REG))]
|
|
1347 ""
|
|
1348 "#"
|
|
1349 "gcn_can_split_p (V64DImode, operands[0])
|
|
1350 && gcn_can_split_p (V64DImode, operands[2])
|
|
1351 && gcn_can_split_p (V64DImode, operands[3])"
|
|
1352 [(const_int 0)]
|
|
1353 {
|
|
1354 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1355 emit_insn (gen_addv64si3_vcc_exec
|
|
1356 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1357 operands[1],
|
|
1358 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1359 vcc,
|
|
1360 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1361 operands[4]));
|
|
1362 emit_insn (gen_addcv64si3_exec
|
|
1363 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1364 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1365 const0_rtx, vcc, vcc,
|
|
1366 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1367 operands[4]));
|
|
1368 DONE;
|
|
1369 }
|
|
1370 [(set_attr "type" "vmult")
|
|
1371 (set_attr "length" "8")])
|
|
1372
|
|
1373 (define_insn_and_split "addv64di3_zext_dup"
|
|
1374 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
|
|
1375 (plus:V64DI
|
|
1376 (zero_extend:V64DI
|
|
1377 (vec_duplicate:V64SI
|
|
1378 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
|
|
1379 (match_operand:V64DI 2 "gcn_alu_operand" "vDA0,vDb0")))
|
|
1380 (clobber (reg:DI VCC_REG))]
|
|
1381 ""
|
|
1382 "#"
|
|
1383 "gcn_can_split_p (V64DImode, operands[0])
|
|
1384 && gcn_can_split_p (V64DImode, operands[2])"
|
|
1385 [(const_int 0)]
|
|
1386 {
|
|
1387 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1388 emit_insn (gen_addv64si3_vcc_dup
|
|
1389 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1390 gcn_operand_part (DImode, operands[1], 0),
|
|
1391 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1392 vcc));
|
|
1393 emit_insn (gen_addcv64si3
|
|
1394 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1395 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1396 const0_rtx, vcc, vcc));
|
|
1397 DONE;
|
|
1398 }
|
|
1399 [(set_attr "type" "vmult")
|
|
1400 (set_attr "length" "8")])
|
|
1401
|
|
1402 (define_insn_and_split "addv64di3_zext_dup_exec"
|
|
1403 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
|
|
1404 (vec_merge:V64DI
|
|
1405 (plus:V64DI
|
|
1406 (zero_extend:V64DI
|
|
1407 (vec_duplicate:V64SI
|
|
1408 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
|
|
1409 (match_operand:V64DI 2 "gcn_alu_operand" "vDb0,vDA0"))
|
|
1410 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
|
|
1411 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
|
|
1412 (clobber (reg:DI VCC_REG))]
|
|
1413 ""
|
|
1414 "#"
|
|
1415 "gcn_can_split_p (V64DImode, operands[0])
|
|
1416 && gcn_can_split_p (V64DImode, operands[2])
|
|
1417 && gcn_can_split_p (V64DImode, operands[3])"
|
|
1418 [(const_int 0)]
|
|
1419 {
|
|
1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1421 emit_insn (gen_addv64si3_vcc_dup_exec
|
|
1422 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1423 gcn_operand_part (DImode, operands[1], 0),
|
|
1424 gcn_operand_part (V64DImode, operands[2], 0),
|
|
1425 vcc,
|
|
1426 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1427 operands[4]));
|
|
1428 emit_insn (gen_addcv64si3_exec
|
|
1429 (gcn_operand_part (V64DImode, operands[0], 1),
|
|
1430 gcn_operand_part (V64DImode, operands[2], 1),
|
|
1431 const0_rtx, vcc, vcc,
|
|
1432 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1433 operands[4]));
|
|
1434 DONE;
|
|
1435 }
|
|
1436 [(set_attr "type" "vmult")
|
|
1437 (set_attr "length" "8")])
|
|
1438
|
|
1439 (define_insn_and_split "addv64di3_zext_dup2"
|
|
1440 [(set (match_operand:V64DI 0 "register_operand" "= &v")
|
|
1441 (plus:V64DI
|
|
1442 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
|
|
1443 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
|
|
1444 (clobber (reg:DI VCC_REG))]
|
|
1445 ""
|
|
1446 "#"
|
|
1447 "gcn_can_split_p (V64DImode, operands[0])"
|
|
1448 [(const_int 0)]
|
|
1449 {
|
|
1450 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1451 emit_insn (gen_addv64si3_vcc_dup
|
|
1452 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1453 gcn_operand_part (DImode, operands[2], 0),
|
|
1454 operands[1],
|
|
1455 vcc));
|
|
1456 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1457 emit_insn (gen_vec_duplicatev64si
|
|
1458 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
|
|
1459 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
|
|
1460 DONE;
|
|
1461 }
|
|
1462 [(set_attr "type" "vmult")
|
|
1463 (set_attr "length" "8")])
|
|
1464
|
|
1465 (define_insn_and_split "addv64di3_zext_dup2_exec"
|
|
1466 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1467 (vec_merge:V64DI
|
|
1468 (plus:V64DI
|
|
1469 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
|
|
1470 " vA"))
|
|
1471 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
|
|
1472 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
|
|
1473 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
|
|
1474 (clobber (reg:DI VCC_REG))]
|
|
1475 ""
|
|
1476 "#"
|
|
1477 "gcn_can_split_p (V64DImode, operands[0])
|
|
1478 && gcn_can_split_p (V64DImode, operands[3])"
|
|
1479 [(const_int 0)]
|
|
1480 {
|
|
1481 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1482 emit_insn (gen_addv64si3_vcc_dup_exec
|
|
1483 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1484 gcn_operand_part (DImode, operands[2], 0),
|
|
1485 operands[1],
|
|
1486 vcc,
|
|
1487 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1488 operands[4]));
|
|
1489 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1490 emit_insn (gen_vec_duplicatev64si_exec
|
|
1491 (dsthi, gcn_operand_part (DImode, operands[2], 1),
|
|
1492 gcn_gen_undef (V64SImode), operands[4]));
|
|
1493 emit_insn (gen_addcv64si3_exec
|
|
1494 (dsthi, dsthi, const0_rtx, vcc, vcc,
|
|
1495 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1496 operands[4]));
|
|
1497 DONE;
|
|
1498 }
|
|
1499 [(set_attr "type" "vmult")
|
|
1500 (set_attr "length" "8")])
|
|
1501
|
|
1502 (define_insn_and_split "addv64di3_sext_dup2"
|
|
1503 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1504 (plus:V64DI
|
|
1505 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
|
|
1506 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
|
|
1507 (clobber (match_scratch:V64SI 3 "=&v"))
|
|
1508 (clobber (reg:DI VCC_REG))]
|
|
1509 ""
|
|
1510 "#"
|
|
1511 "gcn_can_split_p (V64DImode, operands[0])"
|
|
1512 [(const_int 0)]
|
|
1513 {
|
|
1514 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1515 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
|
|
1516 emit_insn (gen_addv64si3_vcc_dup
|
|
1517 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1518 gcn_operand_part (DImode, operands[2], 0),
|
|
1519 operands[1],
|
|
1520 vcc));
|
|
1521 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1522 emit_insn (gen_vec_duplicatev64si
|
|
1523 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
|
|
1524 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
|
|
1525 DONE;
|
|
1526 }
|
|
1527 [(set_attr "type" "vmult")
|
|
1528 (set_attr "length" "8")])
|
|
1529
|
|
1530 (define_insn_and_split "addv64di3_sext_dup2_exec"
|
|
1531 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1532 (vec_merge:V64DI
|
|
1533 (plus:V64DI
|
|
1534 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
|
|
1535 " vA"))
|
|
1536 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
|
|
1537 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
|
|
1538 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
|
|
1539 (clobber (match_scratch:V64SI 5 "=&v"))
|
|
1540 (clobber (reg:DI VCC_REG))]
|
|
1541 ""
|
|
1542 "#"
|
|
1543 "gcn_can_split_p (V64DImode, operands[0])
|
|
1544 && gcn_can_split_p (V64DImode, operands[3])"
|
|
1545 [(const_int 0)]
|
|
1546 {
|
|
1547 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
|
|
1548 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
|
|
1549 gcn_gen_undef (V64SImode), operands[4]));
|
|
1550 emit_insn (gen_addv64si3_vcc_dup_exec
|
|
1551 (gcn_operand_part (V64DImode, operands[0], 0),
|
|
1552 gcn_operand_part (DImode, operands[2], 0),
|
|
1553 operands[1],
|
|
1554 vcc,
|
|
1555 gcn_operand_part (V64DImode, operands[3], 0),
|
|
1556 operands[4]));
|
|
1557 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1558 emit_insn (gen_vec_duplicatev64si_exec
|
|
1559 (dsthi, gcn_operand_part (DImode, operands[2], 1),
|
|
1560 gcn_gen_undef (V64SImode), operands[4]));
|
|
1561 emit_insn (gen_addcv64si3_exec
|
|
1562 (dsthi, dsthi, operands[5], vcc, vcc,
|
|
1563 gcn_operand_part (V64DImode, operands[3], 1),
|
|
1564 operands[4]));
|
|
1565 DONE;
|
|
1566 }
|
|
1567 [(set_attr "type" "vmult")
|
|
1568 (set_attr "length" "8")])
|
|
1569
|
|
1570 ;; }}}
|
|
1571 ;; {{{ DS memory ALU: add/sub
|
|
1572
|
|
1573 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
|
|
1574 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
|
|
1575
|
|
1576 ;; FIXME: the vector patterns probably need RD expanded to a vector of
|
|
1577 ;; addresses. For now, the only way a vector can get into LDS is
|
|
1578 ;; if the user puts it there manually.
|
|
1579 ;;
|
|
1580 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
|
|
1581 ;; checked to see if anything can ever use them.
|
|
1582
|
|
1583 (define_insn "add<mode>3_ds<exec>"
|
|
1584 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1585 (plus:DS_ARITH_MODE
|
|
1586 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
|
|
1587 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
|
|
1588 "rtx_equal_p (operands[0], operands[1])"
|
|
1589 "ds_add%u0\t%A0, %2%O0"
|
|
1590 [(set_attr "type" "ds")
|
|
1591 (set_attr "length" "8")])
|
|
1592
|
|
1593 (define_insn "add<mode>3_ds_scalar"
|
|
1594 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1595 (plus:DS_ARITH_SCALAR_MODE
|
|
1596 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
|
|
1597 "%RD")
|
|
1598 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
|
|
1599 "rtx_equal_p (operands[0], operands[1])"
|
|
1600 "ds_add%u0\t%A0, %2%O0"
|
|
1601 [(set_attr "type" "ds")
|
|
1602 (set_attr "length" "8")])
|
|
1603
|
|
1604 (define_insn "sub<mode>3_ds<exec>"
|
|
1605 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1606 (minus:DS_ARITH_MODE
|
|
1607 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
|
|
1608 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
|
|
1609 "rtx_equal_p (operands[0], operands[1])"
|
|
1610 "ds_sub%u0\t%A0, %2%O0"
|
|
1611 [(set_attr "type" "ds")
|
|
1612 (set_attr "length" "8")])
|
|
1613
|
|
1614 (define_insn "sub<mode>3_ds_scalar"
|
|
1615 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1616 (minus:DS_ARITH_SCALAR_MODE
|
|
1617 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
|
|
1618 " RD")
|
|
1619 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
|
|
1620 "rtx_equal_p (operands[0], operands[1])"
|
|
1621 "ds_sub%u0\t%A0, %2%O0"
|
|
1622 [(set_attr "type" "ds")
|
|
1623 (set_attr "length" "8")])
|
|
1624
|
|
1625 (define_insn "subr<mode>3_ds<exec>"
|
|
1626 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1627 (minus:DS_ARITH_MODE
|
|
1628 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
|
|
1629 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
|
|
1630 "rtx_equal_p (operands[0], operands[1])"
|
|
1631 "ds_rsub%u0\t%A0, %2%O0"
|
|
1632 [(set_attr "type" "ds")
|
|
1633 (set_attr "length" "8")])
|
|
1634
|
|
1635 (define_insn "subr<mode>3_ds_scalar"
|
|
1636 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
|
|
1637 (minus:DS_ARITH_SCALAR_MODE
|
|
1638 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
|
|
1639 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
|
|
1640 " RD")))]
|
|
1641 "rtx_equal_p (operands[0], operands[1])"
|
|
1642 "ds_rsub%u0\t%A0, %2%O0"
|
|
1643 [(set_attr "type" "ds")
|
|
1644 (set_attr "length" "8")])
|
|
1645
|
|
1646 ;; }}}
|
|
1647 ;; {{{ ALU special case: mult
|
|
1648
|
|
1649 (define_insn "<su>mulv64si3_highpart<exec>"
|
|
1650 [(set (match_operand:V64SI 0 "register_operand" "= v")
|
|
1651 (truncate:V64SI
|
|
1652 (lshiftrt:V64DI
|
|
1653 (mult:V64DI
|
|
1654 (any_extend:V64DI
|
|
1655 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
|
|
1656 (any_extend:V64DI
|
|
1657 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
|
|
1658 (const_int 32))))]
|
|
1659 ""
|
|
1660 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
|
|
1661 [(set_attr "type" "vop3a")
|
|
1662 (set_attr "length" "8")])
|
|
1663
|
|
1664 (define_insn "mul<mode>3<exec>"
|
|
1665 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
|
|
1666 (mult:VEC_ALL1REG_INT_MODE
|
|
1667 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
|
|
1668 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " vSvA")))]
|
|
1669 ""
|
|
1670 "v_mul_lo_u32\t%0, %1, %2"
|
|
1671 [(set_attr "type" "vop3a")
|
|
1672 (set_attr "length" "8")])
|
|
1673
|
|
1674 (define_insn "mul<mode>3_dup<exec>"
|
|
1675 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
|
|
1676 (mult:VEC_ALL1REG_INT_MODE
|
|
1677 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
|
|
1678 (vec_duplicate:VEC_ALL1REG_INT_MODE
|
|
1679 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
|
|
1680 ""
|
|
1681 "v_mul_lo_u32\t%0, %1, %2"
|
|
1682 [(set_attr "type" "vop3a")
|
|
1683 (set_attr "length" "8")])
|
|
1684
|
|
1685 (define_insn_and_split "mulv64di3"
|
|
1686 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1687 (mult:V64DI
|
|
1688 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
|
|
1689 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
|
|
1690 (clobber (match_scratch:V64SI 3 "=&v"))]
|
|
1691 ""
|
|
1692 "#"
|
|
1693 "reload_completed"
|
|
1694 [(const_int 0)]
|
|
1695 {
|
|
1696 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1697 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1698 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
|
|
1699 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
|
|
1700 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1701 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1702 rtx tmp = operands[3];
|
|
1703
|
|
1704 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
|
|
1705 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
|
|
1706 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
|
|
1707 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
|
|
1708 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
|
|
1709 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
|
|
1710 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
|
|
1711 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
|
|
1712 DONE;
|
|
1713 })
|
|
1714
|
|
1715 (define_insn_and_split "mulv64di3_exec"
|
|
1716 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1717 (vec_merge:V64DI
|
|
1718 (mult:V64DI
|
|
1719 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
|
|
1720 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
|
|
1721 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
|
|
1722 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
|
|
1723 (clobber (match_scratch:V64SI 5 "=&v"))]
|
|
1724 ""
|
|
1725 "#"
|
|
1726 "reload_completed"
|
|
1727 [(const_int 0)]
|
|
1728 {
|
|
1729 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1730 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1731 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
|
|
1732 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
|
|
1733 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1734 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1735 rtx exec = operands[4];
|
|
1736 rtx tmp = operands[5];
|
|
1737
|
|
1738 rtx old_lo, old_hi;
|
|
1739 if (GET_CODE (operands[3]) == UNSPEC)
|
|
1740 {
|
|
1741 old_lo = old_hi = gcn_gen_undef (V64SImode);
|
|
1742 }
|
|
1743 else
|
|
1744 {
|
|
1745 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
|
|
1746 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
|
|
1747 }
|
|
1748
|
|
1749 rtx undef = gcn_gen_undef (V64SImode);
|
|
1750
|
|
1751 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
|
|
1752 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
|
|
1753 old_hi, exec));
|
|
1754 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
|
|
1755 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
|
|
1756 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
|
|
1757 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
|
|
1758 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
|
|
1759 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
|
|
1760 DONE;
|
|
1761 })
|
|
1762
|
|
1763 (define_insn_and_split "mulv64di3_zext"
|
|
1764 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1765 (mult:V64DI
|
|
1766 (zero_extend:V64DI
|
|
1767 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
|
|
1768 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
|
|
1769 (clobber (match_scratch:V64SI 3 "=&v"))]
|
|
1770 ""
|
|
1771 "#"
|
|
1772 "reload_completed"
|
|
1773 [(const_int 0)]
|
|
1774 {
|
|
1775 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1776 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1777 rtx left = operands[1];
|
|
1778 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1779 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1780 rtx tmp = operands[3];
|
|
1781
|
|
1782 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
|
|
1783 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
|
|
1784 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
|
|
1785 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
|
|
1786 DONE;
|
|
1787 })
|
|
1788
|
|
1789 (define_insn_and_split "mulv64di3_zext_exec"
|
|
1790 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
1791 (vec_merge:V64DI
|
|
1792 (mult:V64DI
|
|
1793 (zero_extend:V64DI
|
|
1794 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
|
|
1795 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
|
|
1796 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
|
|
1797 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
|
|
1798 (clobber (match_scratch:V64SI 5 "=&v"))]
|
|
1799 ""
|
|
1800 "#"
|
|
1801 "reload_completed"
|
|
1802 [(const_int 0)]
|
|
1803 {
|
|
1804 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1805 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1806 rtx left = operands[1];
|
|
1807 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1808 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1809 rtx exec = operands[4];
|
|
1810 rtx tmp = operands[5];
|
|
1811
|
|
1812 rtx old_lo, old_hi;
|
|
1813 if (GET_CODE (operands[3]) == UNSPEC)
|
|
1814 {
|
|
1815 old_lo = old_hi = gcn_gen_undef (V64SImode);
|
|
1816 }
|
|
1817 else
|
|
1818 {
|
|
1819 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
|
|
1820 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
|
|
1821 }
|
|
1822
|
|
1823 rtx undef = gcn_gen_undef (V64SImode);
|
|
1824
|
|
1825 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
|
|
1826 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
|
|
1827 old_hi, exec));
|
|
1828 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
|
|
1829 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
|
|
1830 DONE;
|
|
1831 })
|
|
1832
|
|
1833 (define_insn_and_split "mulv64di3_zext_dup2"
|
|
1834 [(set (match_operand:V64DI 0 "register_operand" "= &v")
|
|
1835 (mult:V64DI
|
|
1836 (zero_extend:V64DI
|
|
1837 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
|
|
1838 (vec_duplicate:V64DI
|
|
1839 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
|
|
1840 (clobber (match_scratch:V64SI 3 "= &v"))]
|
|
1841 ""
|
|
1842 "#"
|
|
1843 "reload_completed"
|
|
1844 [(const_int 0)]
|
|
1845 {
|
|
1846 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1847 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1848 rtx left = operands[1];
|
|
1849 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1850 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1851 rtx tmp = operands[3];
|
|
1852
|
|
1853 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
|
|
1854 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
|
|
1855 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
|
|
1856 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
|
|
1857 DONE;
|
|
1858 })
|
|
1859
|
|
1860 (define_insn_and_split "mulv64di3_zext_dup2_exec"
|
|
1861 [(set (match_operand:V64DI 0 "register_operand" "= &v")
|
|
1862 (vec_merge:V64DI
|
|
1863 (mult:V64DI
|
|
1864 (zero_extend:V64DI
|
|
1865 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
|
|
1866 (vec_duplicate:V64DI
|
|
1867 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
|
|
1868 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
|
|
1869 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
|
|
1870 (clobber (match_scratch:V64SI 5 "= &v"))]
|
|
1871 ""
|
|
1872 "#"
|
|
1873 "reload_completed"
|
|
1874 [(const_int 0)]
|
|
1875 {
|
|
1876 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1877 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1878 rtx left = operands[1];
|
|
1879 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1880 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1881 rtx exec = operands[4];
|
|
1882 rtx tmp = operands[5];
|
|
1883
|
|
1884 rtx old_lo, old_hi;
|
|
1885 if (GET_CODE (operands[3]) == UNSPEC)
|
|
1886 {
|
|
1887 old_lo = old_hi = gcn_gen_undef (V64SImode);
|
|
1888 }
|
|
1889 else
|
|
1890 {
|
|
1891 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
|
|
1892 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
|
|
1893 }
|
|
1894
|
|
1895 rtx undef = gcn_gen_undef (V64SImode);
|
|
1896
|
|
1897 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
|
|
1898 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
|
|
1899 old_hi, exec));
|
|
1900 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
|
|
1901 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
|
|
1902 DONE;
|
|
1903 })
|
|
1904
|
|
1905 ;; }}}
|
|
1906 ;; {{{ ALU generic case
|
|
1907
|
|
1908 (define_mode_iterator VEC_INT_MODE [V64SI V64DI])
|
|
1909
|
|
1910 (define_code_iterator bitop [and ior xor])
|
|
1911 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
|
|
1912 (define_code_iterator minmaxop [smin smax umin umax])
|
|
1913
|
|
1914 (define_insn "<expander><mode>2<exec>"
|
|
1915 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
|
|
1916 (bitunop:VEC_1REG_INT_MODE
|
|
1917 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
|
|
1918 ""
|
|
1919 "v_<mnemonic>0\t%0, %1"
|
|
1920 [(set_attr "type" "vop1")
|
|
1921 (set_attr "length" "8")])
|
|
1922
|
|
1923 (define_insn "<expander><mode>3<exec>"
|
|
1924 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
|
|
1925 (bitop:VEC_1REG_INT_MODE
|
|
1926 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
|
|
1927 "% v, 0")
|
|
1928 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
|
|
1929 "vSvB, v")))]
|
|
1930 ""
|
|
1931 "@
|
|
1932 v_<mnemonic>0\t%0, %2, %1
|
|
1933 ds_<mnemonic>0\t%A0, %2%O0"
|
|
1934 [(set_attr "type" "vop2,ds")
|
|
1935 (set_attr "length" "8,8")])
|
|
1936
|
|
1937 (define_insn_and_split "<expander>v64di3"
|
|
1938 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
|
|
1939 (bitop:V64DI
|
|
1940 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
|
|
1941 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
|
|
1942 ""
|
|
1943 "@
|
|
1944 #
|
|
1945 ds_<mnemonic>0\t%A0, %2%O0"
|
|
1946 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
|
|
1947 [(set (match_dup 3)
|
|
1948 (bitop:V64SI (match_dup 5) (match_dup 7)))
|
|
1949 (set (match_dup 4)
|
|
1950 (bitop:V64SI (match_dup 6) (match_dup 8)))]
|
|
1951 {
|
|
1952 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1953 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1954 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
|
|
1955 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
|
|
1956 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1957 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1958 }
|
|
1959 [(set_attr "type" "vmult,ds")
|
|
1960 (set_attr "length" "16,8")])
|
|
1961
|
|
1962 (define_insn_and_split "<expander>v64di3_exec"
|
|
1963 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
|
|
1964 (vec_merge:V64DI
|
|
1965 (bitop:V64DI
|
|
1966 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
|
|
1967 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
|
|
1968 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
|
|
1969 " U0,U0")
|
|
1970 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
|
|
1971 "!memory_operand (operands[0], VOIDmode)
|
|
1972 || (rtx_equal_p (operands[0], operands[1])
|
|
1973 && register_operand (operands[2], VOIDmode))"
|
|
1974 "@
|
|
1975 #
|
|
1976 ds_<mnemonic>0\t%A0, %2%O0"
|
|
1977 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
|
|
1978 [(set (match_dup 5)
|
|
1979 (vec_merge:V64SI
|
|
1980 (bitop:V64SI (match_dup 7) (match_dup 9))
|
|
1981 (match_dup 11)
|
|
1982 (match_dup 4)))
|
|
1983 (set (match_dup 6)
|
|
1984 (vec_merge:V64SI
|
|
1985 (bitop:V64SI (match_dup 8) (match_dup 10))
|
|
1986 (match_dup 12)
|
|
1987 (match_dup 4)))]
|
|
1988 {
|
|
1989 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
|
|
1990 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
|
|
1991 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
|
|
1992 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
|
|
1993 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
|
|
1994 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
|
|
1995 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
|
|
1996 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
|
|
1997 }
|
|
1998 [(set_attr "type" "vmult,ds")
|
|
1999 (set_attr "length" "16,8")])
|
|
2000
|
|
2001 (define_insn "<expander>v64si3<exec>"
|
|
2002 [(set (match_operand:V64SI 0 "register_operand" "= v")
|
|
2003 (shiftop:V64SI
|
|
2004 (match_operand:V64SI 1 "gcn_alu_operand" " v")
|
|
2005 (vec_duplicate:V64SI
|
|
2006 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
|
|
2007 ""
|
|
2008 "v_<revmnemonic>0\t%0, %2, %1"
|
|
2009 [(set_attr "type" "vop2")
|
|
2010 (set_attr "length" "8")])
|
|
2011
|
|
2012 (define_insn "v<expander>v64si3<exec>"
|
|
2013 [(set (match_operand:V64SI 0 "register_operand" "=v")
|
|
2014 (shiftop:V64SI
|
|
2015 (match_operand:V64SI 1 "gcn_alu_operand" " v")
|
|
2016 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
|
|
2017 ""
|
|
2018 "v_<revmnemonic>0\t%0, %2, %1"
|
|
2019 [(set_attr "type" "vop2")
|
|
2020 (set_attr "length" "8")])
|
|
2021
|
|
2022 (define_insn "<expander><mode>3<exec>"
|
|
2023 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
|
|
2024 (minmaxop:VEC_1REG_INT_MODE
|
|
2025 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
|
|
2026 "% v, 0")
|
|
2027 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
|
|
2028 "vSvB, v")))]
|
|
2029 ""
|
|
2030 "@
|
|
2031 v_<mnemonic>0\t%0, %2, %1
|
|
2032 ds_<mnemonic>0\t%A0, %2%O0"
|
|
2033 [(set_attr "type" "vop2,ds")
|
|
2034 (set_attr "length" "8,8")])
|
|
2035
|
|
2036 ;; }}}
|
|
2037 ;; {{{ FP binops - special cases
|
|
2038
|
|
2039 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
|
|
2040 ; adding the negated second operand to the first.
|
|
2041
|
|
2042 (define_insn "subv64df3<exec>"
|
|
2043 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
|
|
2044 (minus:V64DF
|
|
2045 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
|
|
2046 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
|
|
2047 ""
|
|
2048 "@
|
|
2049 v_add_f64\t%0, %1, -%2
|
|
2050 v_add_f64\t%0, -%2, %1"
|
|
2051 [(set_attr "type" "vop3a")
|
|
2052 (set_attr "length" "8,8")])
|
|
2053
|
|
2054 (define_insn "subdf"
|
|
2055 [(set (match_operand:DF 0 "register_operand" "= v, v")
|
|
2056 (minus:DF
|
|
2057 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
|
|
2058 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
|
|
2059 ""
|
|
2060 "@
|
|
2061 v_add_f64\t%0, %1, -%2
|
|
2062 v_add_f64\t%0, -%2, %1"
|
|
2063 [(set_attr "type" "vop3a")
|
|
2064 (set_attr "length" "8,8")])
|
|
2065
|
|
2066 ;; }}}
|
|
2067 ;; {{{ FP binops - generic
|
|
2068
|
|
2069 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
|
|
2070 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
|
|
2071 (define_mode_iterator FP_MODE [HF SF DF])
|
|
2072 (define_mode_iterator FP_1REG_MODE [HF SF])
|
|
2073
|
|
2074 (define_code_iterator comm_fp [plus mult smin smax])
|
|
2075 (define_code_iterator nocomm_fp [minus])
|
|
2076 (define_code_iterator all_fp [plus mult minus smin smax])
|
|
2077
|
|
2078 (define_insn "<expander><mode>3<exec>"
|
|
2079 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
|
|
2080 (comm_fp:VEC_FP_MODE
|
|
2081 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
|
|
2082 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
|
|
2083 ""
|
|
2084 "v_<mnemonic>0\t%0, %2, %1"
|
|
2085 [(set_attr "type" "vop2")
|
|
2086 (set_attr "length" "8")])
|
|
2087
|
|
2088 (define_insn "<expander><mode>3"
|
|
2089 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
|
|
2090 (comm_fp:FP_MODE
|
|
2091 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
|
|
2092 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
|
|
2093 ""
|
|
2094 "@
|
|
2095 v_<mnemonic>0\t%0, %2, %1
|
|
2096 v_<mnemonic>0\t%0, %1%O0"
|
|
2097 [(set_attr "type" "vop2,ds")
|
|
2098 (set_attr "length" "8")])
|
|
2099
|
|
2100 (define_insn "<expander><mode>3<exec>"
|
|
2101 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
|
|
2102 (nocomm_fp:VEC_FP_1REG_MODE
|
|
2103 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
|
|
2104 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
|
|
2105 ""
|
|
2106 "@
|
|
2107 v_<mnemonic>0\t%0, %1, %2
|
|
2108 v_<revmnemonic>0\t%0, %2, %1"
|
|
2109 [(set_attr "type" "vop2")
|
|
2110 (set_attr "length" "8,8")])
|
|
2111
|
|
2112 (define_insn "<expander><mode>3"
|
|
2113 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
|
|
2114 (nocomm_fp:FP_1REG_MODE
|
|
2115 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
|
|
2116 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
|
|
2117 ""
|
|
2118 "@
|
|
2119 v_<mnemonic>0\t%0, %1, %2
|
|
2120 v_<revmnemonic>0\t%0, %2, %1"
|
|
2121 [(set_attr "type" "vop2")
|
|
2122 (set_attr "length" "8,8")])
|
|
2123
|
|
2124 ;; }}}
|
|
2125 ;; {{{ FP unops
|
|
2126
|
|
2127 (define_insn "abs<mode>2"
|
|
2128 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
|
|
2129 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
|
|
2130 ""
|
|
2131 "v_add%i0\t%0, 0, |%1|"
|
|
2132 [(set_attr "type" "vop3a")
|
|
2133 (set_attr "length" "8")])
|
|
2134
|
|
2135 (define_insn "abs<mode>2<exec>"
|
|
2136 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
|
|
2137 (abs:VEC_FP_MODE
|
|
2138 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
|
|
2139 ""
|
|
2140 "v_add%i0\t%0, 0, |%1|"
|
|
2141 [(set_attr "type" "vop3a")
|
|
2142 (set_attr "length" "8")])
|
|
2143
|
|
2144 (define_insn "neg<mode>2<exec>"
|
|
2145 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
|
|
2146 (neg:VEC_FP_MODE
|
|
2147 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
|
|
2148 ""
|
|
2149 "v_add%i0\t%0, 0, -%1"
|
|
2150 [(set_attr "type" "vop3a")
|
|
2151 (set_attr "length" "8")])
|
|
2152
|
|
2153 (define_insn "sqrt<mode>2<exec>"
|
|
2154 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
|
|
2155 (sqrt:VEC_FP_MODE
|
|
2156 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2157 "flag_unsafe_math_optimizations"
|
|
2158 "v_sqrt%i0\t%0, %1"
|
|
2159 [(set_attr "type" "vop1")
|
|
2160 (set_attr "length" "8")])
|
|
2161
|
|
2162 (define_insn "sqrt<mode>2"
|
|
2163 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
|
|
2164 (sqrt:FP_MODE
|
|
2165 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2166 "flag_unsafe_math_optimizations"
|
|
2167 "v_sqrt%i0\t%0, %1"
|
|
2168 [(set_attr "type" "vop1")
|
|
2169 (set_attr "length" "8")])
|
|
2170
|
|
2171 ;; }}}
|
|
2172 ;; {{{ FP fused multiply and add
|
|
2173
|
|
2174 (define_insn "fma<mode>4<exec>"
|
|
2175 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
|
|
2176 (fma:VEC_FP_MODE
|
|
2177 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
|
|
2178 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
|
|
2179 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
|
|
2180 ""
|
|
2181 "v_fma%i0\t%0, %1, %2, %3"
|
|
2182 [(set_attr "type" "vop3a")
|
|
2183 (set_attr "length" "8")])
|
|
2184
|
|
2185 (define_insn "fma<mode>4_negop2<exec>"
|
|
2186 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
|
|
2187 (fma:VEC_FP_MODE
|
|
2188 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
|
|
2189 (neg:VEC_FP_MODE
|
|
2190 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
|
|
2191 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
|
|
2192 ""
|
|
2193 "v_fma%i0\t%0, %1, -%2, %3"
|
|
2194 [(set_attr "type" "vop3a")
|
|
2195 (set_attr "length" "8")])
|
|
2196
|
|
2197 (define_insn "fma<mode>4"
|
|
2198 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
|
|
2199 (fma:FP_MODE
|
|
2200 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
|
|
2201 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
|
|
2202 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
|
|
2203 ""
|
|
2204 "v_fma%i0\t%0, %1, %2, %3"
|
|
2205 [(set_attr "type" "vop3a")
|
|
2206 (set_attr "length" "8")])
|
|
2207
|
|
2208 (define_insn "fma<mode>4_negop2"
|
|
2209 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
|
|
2210 (fma:FP_MODE
|
|
2211 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
|
|
2212 (neg:FP_MODE
|
|
2213 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
|
|
2214 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
|
|
2215 ""
|
|
2216 "v_fma%i0\t%0, %1, -%2, %3"
|
|
2217 [(set_attr "type" "vop3a")
|
|
2218 (set_attr "length" "8")])
|
|
2219
|
|
2220 ;; }}}
|
|
2221 ;; {{{ FP division
|
|
2222
|
|
2223 (define_insn "recip<mode>2<exec>"
|
|
2224 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
|
|
2225 (div:VEC_FP_MODE
|
|
2226 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
|
|
2227 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2228 ""
|
|
2229 "v_rcp%i0\t%0, %1"
|
|
2230 [(set_attr "type" "vop1")
|
|
2231 (set_attr "length" "8")])
|
|
2232
|
|
2233 (define_insn "recip<mode>2"
|
|
2234 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
|
|
2235 (div:FP_MODE
|
|
2236 (float:FP_MODE (const_int 1))
|
|
2237 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2238 ""
|
|
2239 "v_rcp%i0\t%0, %1"
|
|
2240 [(set_attr "type" "vop1")
|
|
2241 (set_attr "length" "8")])
|
|
2242
|
|
2243 ;; Do division via a = b * 1/c
|
|
2244 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
|
|
2245 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
|
|
2246 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
|
|
2247 ;;
|
|
2248 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
|
|
2249
|
|
2250 (define_expand "div<mode>3"
|
|
2251 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
|
|
2252 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
|
|
2253 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
|
|
2254 "flag_reciprocal_math"
|
|
2255 {
|
|
2256 rtx two = gcn_vec_constant (<MODE>mode,
|
|
2257 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
|
|
2258 rtx initrcp = gen_reg_rtx (<MODE>mode);
|
|
2259 rtx fma = gen_reg_rtx (<MODE>mode);
|
|
2260 rtx rcp;
|
|
2261
|
|
2262 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
|
|
2263 && real_identical
|
|
2264 (CONST_DOUBLE_REAL_VALUE
|
|
2265 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
|
|
2266
|
|
2267 if (is_rcp)
|
|
2268 rcp = operands[0];
|
|
2269 else
|
|
2270 rcp = gen_reg_rtx (<MODE>mode);
|
|
2271
|
|
2272 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
|
|
2273 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
|
|
2274 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
|
|
2275
|
|
2276 if (!is_rcp)
|
|
2277 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
|
|
2278
|
|
2279 DONE;
|
|
2280 })
|
|
2281
|
|
2282 (define_expand "div<mode>3"
|
|
2283 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
|
|
2284 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
|
|
2285 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
|
|
2286 "flag_reciprocal_math"
|
|
2287 {
|
|
2288 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
|
|
2289 rtx initrcp = gen_reg_rtx (<MODE>mode);
|
|
2290 rtx fma = gen_reg_rtx (<MODE>mode);
|
|
2291 rtx rcp;
|
|
2292
|
|
2293 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
|
|
2294 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
|
|
2295 &dconstm1));
|
|
2296
|
|
2297 if (is_rcp)
|
|
2298 rcp = operands[0];
|
|
2299 else
|
|
2300 rcp = gen_reg_rtx (<MODE>mode);
|
|
2301
|
|
2302 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
|
|
2303 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
|
|
2304 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
|
|
2305
|
|
2306 if (!is_rcp)
|
|
2307 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
|
|
2308
|
|
2309 DONE;
|
|
2310 })
|
|
2311
|
|
2312 ;; }}}
|
|
2313 ;; {{{ Int/FP conversions
|
|
2314
|
|
2315 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
|
|
2316 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
|
|
2317
|
|
2318 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
|
|
2319 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
|
|
2320 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
|
|
2321
|
|
2322 (define_code_iterator cvt_op [fix unsigned_fix
|
|
2323 float unsigned_float
|
|
2324 float_extend float_truncate])
|
|
2325 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
|
|
2326 (float "float") (unsigned_float "floatuns")
|
|
2327 (float_extend "extend") (float_truncate "trunc")])
|
|
2328 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
|
|
2329 (float "%i0%i1") (unsigned_float "%i0%u1")
|
|
2330 (float_extend "%i0%i1")
|
|
2331 (float_truncate "%i0%i1")])
|
|
2332
|
|
2333 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
|
|
2334 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
|
|
2335 (cvt_op:CVT_TO_MODE
|
|
2336 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2337 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
|
|
2338 <cvt_name>_cvt)"
|
|
2339 "v_cvt<cvt_operands>\t%0, %1"
|
|
2340 [(set_attr "type" "vop1")
|
|
2341 (set_attr "length" "8")])
|
|
2342
|
|
2343 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
|
|
2344 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
|
|
2345 (cvt_op:VCVT_FMODE
|
|
2346 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2347 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
|
|
2348 <cvt_name>_cvt)"
|
|
2349 "v_cvt<cvt_operands>\t%0, %1"
|
|
2350 [(set_attr "type" "vop1")
|
|
2351 (set_attr "length" "8")])
|
|
2352
|
|
2353 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
|
|
2354 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
|
|
2355 (cvt_op:VCVT_IMODE
|
|
2356 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
|
|
2357 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
|
|
2358 <cvt_name>_cvt)"
|
|
2359 "v_cvt<cvt_operands>\t%0, %1"
|
|
2360 [(set_attr "type" "vop1")
|
|
2361 (set_attr "length" "8")])
|
|
2362
|
|
2363 ;; }}}
|
|
2364 ;; {{{ Int/int conversions
|
|
2365
|
|
2366 (define_code_iterator zero_convert [truncate zero_extend])
|
|
2367 (define_code_attr convop [
|
|
2368 (sign_extend "extend")
|
|
2369 (zero_extend "zero_extend")
|
|
2370 (truncate "trunc")])
|
|
2371
|
|
2372 (define_insn "<convop><VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
|
|
2373 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
|
|
2374 (zero_convert:VEC_ALL1REG_INT_MODE
|
|
2375 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
|
|
2376 ""
|
|
2377 "v_mov_b32_sdwa\t%0, %1 dst_sel:<VEC_ALL1REG_INT_MODE:sdwa> dst_unused:UNUSED_PAD src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
|
|
2378 [(set_attr "type" "vop_sdwa")
|
|
2379 (set_attr "length" "8")])
|
|
2380
|
|
2381 (define_insn "extend<VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
|
|
2382 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
|
|
2383 (sign_extend:VEC_ALL1REG_INT_MODE
|
|
2384 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
|
|
2385 ""
|
|
2386 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
|
|
2387 [(set_attr "type" "vop_sdwa")
|
|
2388 (set_attr "length" "8")])
|
|
2389
|
|
2390 ;; GCC can already do these for scalar types, but not for vector types.
|
|
2391 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
|
|
2392 ;; so there must be a few tricks here.
|
|
2393
|
|
2394 (define_insn_and_split "truncv64di<mode>2"
|
|
2395 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
|
|
2396 (truncate:VEC_ALL1REG_INT_MODE
|
|
2397 (match_operand:V64DI 1 "gcn_alu_operand" " v")))]
|
|
2398 ""
|
|
2399 "#"
|
|
2400 "reload_completed"
|
|
2401 [(const_int 0)]
|
|
2402 {
|
|
2403 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
|
|
2404 rtx out = operands[0];
|
|
2405
|
|
2406 if (<MODE>mode != V64SImode)
|
|
2407 emit_insn (gen_truncv64si<mode>2 (out, inlo));
|
|
2408 else
|
|
2409 emit_move_insn (out, inlo);
|
|
2410 }
|
|
2411 [(set_attr "type" "vop2")
|
|
2412 (set_attr "length" "4")])
|
|
2413
|
|
2414 (define_insn_and_split "truncv64di<mode>2_exec"
|
|
2415 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
|
|
2416 (vec_merge:VEC_ALL1REG_INT_MODE
|
|
2417 (truncate:VEC_ALL1REG_INT_MODE
|
|
2418 (match_operand:V64DI 1 "gcn_alu_operand" " v"))
|
|
2419 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_or_unspec_operand"
|
|
2420 "U0")
|
|
2421 (match_operand:DI 3 "gcn_exec_operand" " e")))]
|
|
2422 ""
|
|
2423 "#"
|
|
2424 "reload_completed"
|
|
2425 [(const_int 0)]
|
|
2426 {
|
|
2427 rtx out = operands[0];
|
|
2428 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
|
|
2429 rtx merge = operands[2];
|
|
2430 rtx exec = operands[3];
|
|
2431
|
|
2432 if (<MODE>mode != V64SImode)
|
|
2433 emit_insn (gen_truncv64si<mode>2_exec (out, inlo, merge, exec));
|
|
2434 else
|
|
2435 emit_insn (gen_mov<mode>_exec (out, inlo, exec, merge));
|
|
2436 }
|
|
2437 [(set_attr "type" "vop2")
|
|
2438 (set_attr "length" "4")])
|
|
2439
|
|
2440 (define_insn_and_split "<convop><mode>v64di2"
|
|
2441 [(set (match_operand:V64DI 0 "register_operand" "=v")
|
|
2442 (any_extend:V64DI
|
|
2443 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v")))]
|
|
2444 ""
|
|
2445 "#"
|
|
2446 "reload_completed"
|
|
2447 [(const_int 0)]
|
|
2448 {
|
|
2449 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
2450 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
2451 rtx in = operands[1];
|
|
2452
|
|
2453 if (<MODE>mode != V64SImode)
|
|
2454 emit_insn (gen_<convop><mode>v64si2 (outlo, in));
|
|
2455 else
|
|
2456 emit_move_insn (outlo, in);
|
|
2457 if ('<su>' == 's')
|
|
2458 emit_insn (gen_ashrv64si3 (outhi, outlo, GEN_INT (31)));
|
|
2459 else
|
|
2460 emit_insn (gen_vec_duplicatev64si (outhi, const0_rtx));
|
|
2461 }
|
|
2462 [(set_attr "type" "mult")
|
|
2463 (set_attr "length" "12")])
|
|
2464
|
|
2465 (define_insn_and_split "<convop><mode>v64di2_exec"
|
|
2466 [(set (match_operand:V64DI 0 "register_operand" "=v")
|
|
2467 (vec_merge:V64DI
|
|
2468 (any_extend:V64DI
|
|
2469 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v"))
|
|
2470 (match_operand:V64DI 2 "gcn_alu_or_unspec_operand" "U0")
|
|
2471 (match_operand:DI 3 "gcn_exec_operand" " e")))]
|
|
2472 ""
|
|
2473 "#"
|
|
2474 "reload_completed"
|
|
2475 [(const_int 0)]
|
|
2476 {
|
|
2477 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
|
|
2478 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
|
|
2479 rtx in = operands[1];
|
|
2480 rtx mergelo = gcn_operand_part (V64DImode, operands[2], 0);
|
|
2481 rtx mergehi = gcn_operand_part (V64DImode, operands[2], 1);
|
|
2482 rtx exec = operands[3];
|
|
2483
|
|
2484 if (<MODE>mode != V64SImode)
|
|
2485 emit_insn (gen_<convop><mode>v64si2_exec (outlo, in, mergelo, exec));
|
|
2486 else
|
|
2487 emit_insn (gen_mov<mode>_exec (outlo, in, exec, mergelo));
|
|
2488 if ('<su>' == 's')
|
|
2489 emit_insn (gen_ashrv64si3_exec (outhi, outlo, GEN_INT (31), mergehi,
|
|
2490 exec));
|
|
2491 else
|
|
2492 emit_insn (gen_vec_duplicatev64si_exec (outhi, const0_rtx, mergehi,
|
|
2493 exec));
|
|
2494 }
|
|
2495 [(set_attr "type" "mult")
|
|
2496 (set_attr "length" "12")])
|
|
2497
|
|
2498 ;; }}}
|
|
2499 ;; {{{ Vector comparison/merge
|
|
2500
|
|
2501 (define_mode_iterator VCMP_MODE [V64HI V64SI V64DI V64HF V64SF V64DF])
|
|
2502 (define_mode_iterator VCMP_MODE_INT [V64HI V64SI V64DI])
|
|
2503
|
|
2504 (define_insn "vec_cmp<mode>di"
|
|
2505 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
|
|
2506 (match_operator 1 "gcn_fp_compare_operator"
|
|
2507 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
|
|
2508 "vSv, B,vSv, B, v,vA")
|
|
2509 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
|
|
2510 " v, v, v, v,vA, v")]))
|
|
2511 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
|
|
2512 ""
|
|
2513 "@
|
|
2514 v_cmp%E1\tvcc, %2, %3
|
|
2515 v_cmp%E1\tvcc, %2, %3
|
|
2516 v_cmpx%E1\tvcc, %2, %3
|
|
2517 v_cmpx%E1\tvcc, %2, %3
|
|
2518 v_cmp%E1\t%0, %2, %3
|
|
2519 v_cmp%E1\t%0, %2, %3"
|
|
2520 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
|
|
2521 (set_attr "length" "4,8,4,8,8,8")])
|
|
2522
|
|
2523 (define_expand "vec_cmpu<mode>di"
|
|
2524 [(match_operand:DI 0 "register_operand")
|
|
2525 (match_operator 1 "gcn_compare_operator"
|
|
2526 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
|
|
2527 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])]
|
|
2528 ""
|
|
2529 {
|
|
2530 /* Unsigned comparisons use the same patterns as signed comparisons,
|
|
2531 except that they use unsigned operators (e.g. LTU vs LT).
|
|
2532 The '%E1' directive then does the Right Thing. */
|
|
2533 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
|
|
2534 operands[3]));
|
|
2535 DONE;
|
|
2536 })
|
|
2537
|
|
2538 ; There's no instruction for 8-bit vector comparison, so we need to extend.
|
|
2539 (define_expand "vec_cmp<u>v64qidi"
|
|
2540 [(match_operand:DI 0 "register_operand")
|
|
2541 (match_operator 1 "gcn_compare_operator"
|
|
2542 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
|
|
2543 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])]
|
|
2544 "can_create_pseudo_p ()"
|
|
2545 {
|
|
2546 rtx sitmp1 = gen_reg_rtx (V64SImode);
|
|
2547 rtx sitmp2 = gen_reg_rtx (V64SImode);
|
|
2548
|
|
2549 emit_insn (gen_<expander>v64qiv64si2 (sitmp1, operands[2]));
|
|
2550 emit_insn (gen_<expander>v64qiv64si2 (sitmp2, operands[3]));
|
|
2551 emit_insn (gen_vec_cmpv64sidi (operands[0], operands[1], sitmp1, sitmp2));
|
|
2552 DONE;
|
|
2553 })
|
|
2554
|
|
2555 (define_insn "vec_cmp<mode>di_exec"
|
|
2556 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
|
|
2557 (and:DI
|
|
2558 (match_operator 1 "gcn_fp_compare_operator"
|
|
2559 [(match_operand:VCMP_MODE 2 "gcn_alu_operand"
|
|
2560 "vSv, B,vSv, B, v,vA")
|
|
2561 (match_operand:VCMP_MODE 3 "gcn_vop3_operand"
|
|
2562 " v, v, v, v,vA, v")])
|
|
2563 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
|
|
2564 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
|
|
2565 ""
|
|
2566 "@
|
|
2567 v_cmp%E1\tvcc, %2, %3
|
|
2568 v_cmp%E1\tvcc, %2, %3
|
|
2569 v_cmpx%E1\tvcc, %2, %3
|
|
2570 v_cmpx%E1\tvcc, %2, %3
|
|
2571 v_cmp%E1\t%0, %2, %3
|
|
2572 v_cmp%E1\t%0, %2, %3"
|
|
2573 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
|
|
2574 (set_attr "length" "4,8,4,8,8,8")])
|
|
2575
|
|
2576 (define_expand "vec_cmpu<mode>di_exec"
|
|
2577 [(match_operand:DI 0 "register_operand")
|
|
2578 (match_operator 1 "gcn_compare_operator"
|
|
2579 [(match_operand:VCMP_MODE_INT 2 "gcn_alu_operand")
|
|
2580 (match_operand:VCMP_MODE_INT 3 "gcn_vop3_operand")])
|
|
2581 (match_operand:DI 4 "gcn_exec_reg_operand")]
|
|
2582 ""
|
|
2583 {
|
|
2584 /* Unsigned comparisons use the same patterns as signed comparisons,
|
|
2585 except that they use unsigned operators (e.g. LTU vs LT).
|
|
2586 The '%E1' directive then does the Right Thing. */
|
|
2587 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
|
|
2588 operands[2], operands[3],
|
|
2589 operands[4]));
|
|
2590 DONE;
|
|
2591 })
|
|
2592
|
|
2593 (define_expand "vec_cmp<u>v64qidi_exec"
|
|
2594 [(match_operand:DI 0 "register_operand")
|
|
2595 (match_operator 1 "gcn_compare_operator"
|
|
2596 [(any_extend:V64SI (match_operand:V64QI 2 "gcn_alu_operand"))
|
|
2597 (any_extend:V64SI (match_operand:V64QI 3 "gcn_vop3_operand"))])
|
|
2598 (match_operand:DI 4 "gcn_exec_reg_operand")]
|
|
2599 "can_create_pseudo_p ()"
|
|
2600 {
|
|
2601 rtx sitmp1 = gen_reg_rtx (V64SImode);
|
|
2602 rtx sitmp2 = gen_reg_rtx (V64SImode);
|
|
2603
|
|
2604 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp1, operands[2],
|
|
2605 operands[2], operands[4]));
|
|
2606 emit_insn (gen_<expander>v64qiv64si2_exec (sitmp2, operands[3],
|
|
2607 operands[3], operands[4]));
|
|
2608 emit_insn (gen_vec_cmpv64sidi_exec (operands[0], operands[1], sitmp1,
|
|
2609 sitmp2, operands[4]));
|
|
2610 DONE;
|
|
2611 })
|
|
2612
|
|
2613 (define_insn "vec_cmp<mode>di_dup"
|
|
2614 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
|
|
2615 (match_operator 1 "gcn_fp_compare_operator"
|
|
2616 [(vec_duplicate:VCMP_MODE
|
|
2617 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
|
|
2618 " Sv, B,Sv,B, A"))
|
|
2619 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")]))
|
|
2620 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
|
|
2621 ""
|
|
2622 "@
|
|
2623 v_cmp%E1\tvcc, %2, %3
|
|
2624 v_cmp%E1\tvcc, %2, %3
|
|
2625 v_cmpx%E1\tvcc, %2, %3
|
|
2626 v_cmpx%E1\tvcc, %2, %3
|
|
2627 v_cmp%E1\t%0, %2, %3"
|
|
2628 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
|
|
2629 (set_attr "length" "4,8,4,8,8")])
|
|
2630
|
|
2631 (define_insn "vec_cmp<mode>di_dup_exec"
|
|
2632 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
|
|
2633 (and:DI
|
|
2634 (match_operator 1 "gcn_fp_compare_operator"
|
|
2635 [(vec_duplicate:VCMP_MODE
|
|
2636 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
|
|
2637 " Sv, B,Sv,B, A"))
|
|
2638 (match_operand:VCMP_MODE 3 "gcn_vop3_operand" " v, v, v,v, v")])
|
|
2639 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
|
|
2640 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
|
|
2641 ""
|
|
2642 "@
|
|
2643 v_cmp%E1\tvcc, %2, %3
|
|
2644 v_cmp%E1\tvcc, %2, %3
|
|
2645 v_cmpx%E1\tvcc, %2, %3
|
|
2646 v_cmpx%E1\tvcc, %2, %3
|
|
2647 v_cmp%E1\t%0, %2, %3"
|
|
2648 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
|
|
2649 (set_attr "length" "4,8,4,8,8")])
|
|
2650
|
|
2651 (define_expand "vcond_mask_<mode>di"
|
|
2652 [(parallel
|
|
2653 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
|
|
2654 (vec_merge:VEC_ALLREG_MODE
|
|
2655 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
|
|
2656 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
|
|
2657 (match_operand:DI 3 "register_operand" "")))
|
|
2658 (clobber (scratch:V64DI))])]
|
|
2659 ""
|
|
2660 "")
|
|
2661
|
|
2662 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>"
|
|
2663 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2664 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
|
|
2665 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
|
|
2666 (match_operator 3 "gcn_fp_compare_operator"
|
|
2667 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
|
|
2668 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])]
|
|
2669 ""
|
|
2670 {
|
|
2671 rtx tmp = gen_reg_rtx (DImode);
|
|
2672 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di
|
|
2673 (tmp, operands[3], operands[4], operands[5]));
|
|
2674 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
|
|
2675 (operands[0], operands[1], operands[2], tmp));
|
|
2676 DONE;
|
|
2677 })
|
|
2678
|
|
2679 (define_expand "vcond<VEC_ALLREG_MODE:mode><VEC_ALLREG_ALT:mode>_exec"
|
|
2680 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2681 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
|
|
2682 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
|
|
2683 (match_operator 3 "gcn_fp_compare_operator"
|
|
2684 [(match_operand:VEC_ALLREG_ALT 4 "gcn_alu_operand")
|
|
2685 (match_operand:VEC_ALLREG_ALT 5 "gcn_vop3_operand")])
|
|
2686 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
|
|
2687 ""
|
|
2688 {
|
|
2689 rtx tmp = gen_reg_rtx (DImode);
|
|
2690 emit_insn (gen_vec_cmp<VEC_ALLREG_ALT:mode>di_exec
|
|
2691 (tmp, operands[3], operands[4], operands[5], operands[6]));
|
|
2692 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
|
|
2693 (operands[0], operands[1], operands[2], tmp));
|
|
2694 DONE;
|
|
2695 })
|
|
2696
|
|
2697 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>"
|
|
2698 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2699 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
|
|
2700 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
|
|
2701 (match_operator 3 "gcn_fp_compare_operator"
|
|
2702 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
|
|
2703 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])]
|
|
2704 ""
|
|
2705 {
|
|
2706 rtx tmp = gen_reg_rtx (DImode);
|
|
2707 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di
|
|
2708 (tmp, operands[3], operands[4], operands[5]));
|
|
2709 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
|
|
2710 (operands[0], operands[1], operands[2], tmp));
|
|
2711 DONE;
|
|
2712 })
|
|
2713
|
|
2714 (define_expand "vcondu<VEC_ALLREG_MODE:mode><VEC_ALLREG_INT_MODE:mode>_exec"
|
|
2715 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2716 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand")
|
|
2717 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand")
|
|
2718 (match_operator 3 "gcn_fp_compare_operator"
|
|
2719 [(match_operand:VEC_ALLREG_INT_MODE 4 "gcn_alu_operand")
|
|
2720 (match_operand:VEC_ALLREG_INT_MODE 5 "gcn_vop3_operand")])
|
|
2721 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
|
|
2722 ""
|
|
2723 {
|
|
2724 rtx tmp = gen_reg_rtx (DImode);
|
|
2725 emit_insn (gen_vec_cmpu<VEC_ALLREG_INT_MODE:mode>di_exec
|
|
2726 (tmp, operands[3], operands[4], operands[5], operands[6]));
|
|
2727 emit_insn (gen_vcond_mask_<VEC_ALLREG_MODE:mode>di
|
|
2728 (operands[0], operands[1], operands[2], tmp));
|
|
2729 DONE;
|
|
2730 })
|
|
2731
|
|
2732 ;; }}}
|
|
2733 ;; {{{ Fully masked loop support
|
|
2734
|
|
2735 (define_expand "while_ultsidi"
|
|
2736 [(match_operand:DI 0 "register_operand")
|
|
2737 (match_operand:SI 1 "")
|
|
2738 (match_operand:SI 2 "")]
|
|
2739 ""
|
|
2740 {
|
|
2741 if (GET_CODE (operands[1]) != CONST_INT
|
|
2742 || GET_CODE (operands[2]) != CONST_INT)
|
|
2743 {
|
|
2744 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
|
|
2745 rtx tmp = _0_1_2_3;
|
|
2746 if (GET_CODE (operands[1]) != CONST_INT
|
|
2747 || INTVAL (operands[1]) != 0)
|
|
2748 {
|
|
2749 tmp = gen_reg_rtx (V64SImode);
|
|
2750 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
|
|
2751 }
|
|
2752 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
|
|
2753 gen_rtx_GT (VOIDmode, 0, 0),
|
|
2754 operands[2], tmp));
|
|
2755 }
|
|
2756 else
|
|
2757 {
|
|
2758 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
|
|
2759 HOST_WIDE_INT mask = (diff >= 64 ? -1
|
|
2760 : ~((unsigned HOST_WIDE_INT)-1 << diff));
|
|
2761 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
|
|
2762 }
|
|
2763 DONE;
|
|
2764 })
|
|
2765
|
|
2766 (define_expand "maskload<mode>di"
|
|
2767 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2768 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
|
|
2769 (match_operand 2 "")]
|
|
2770 ""
|
|
2771 {
|
|
2772 rtx exec = force_reg (DImode, operands[2]);
|
|
2773 rtx addr = gcn_expand_scalar_to_vector_address
|
|
2774 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
|
|
2775 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
|
|
2776 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
|
|
2777
|
|
2778 /* Masked lanes are required to hold zero. */
|
|
2779 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
|
|
2780
|
|
2781 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
|
|
2782 operands[0], exec));
|
|
2783 DONE;
|
|
2784 })
|
|
2785
|
|
2786 (define_expand "maskstore<mode>di"
|
|
2787 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
|
|
2788 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
|
|
2789 (match_operand 2 "")]
|
|
2790 ""
|
|
2791 {
|
|
2792 rtx exec = force_reg (DImode, operands[2]);
|
|
2793 rtx addr = gcn_expand_scalar_to_vector_address
|
|
2794 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
|
|
2795 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
|
|
2796 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
|
|
2797 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
|
|
2798 DONE;
|
|
2799 })
|
|
2800
|
|
2801 (define_expand "mask_gather_load<mode>"
|
|
2802 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
|
|
2803 (match_operand:DI 1 "register_operand")
|
|
2804 (match_operand 2 "register_operand")
|
|
2805 (match_operand 3 "immediate_operand")
|
|
2806 (match_operand:SI 4 "gcn_alu_operand")
|
|
2807 (match_operand:DI 5 "")]
|
|
2808 ""
|
|
2809 {
|
|
2810 rtx exec = force_reg (DImode, operands[5]);
|
|
2811
|
|
2812 /* TODO: more conversions will be needed when more types are vectorized. */
|
|
2813 if (GET_MODE (operands[2]) == V64DImode)
|
|
2814 {
|
|
2815 rtx tmp = gen_reg_rtx (V64SImode);
|
|
2816 emit_insn (gen_truncv64div64si2_exec (tmp, operands[2],
|
|
2817 gcn_gen_undef (V64SImode),
|
|
2818 exec));
|
|
2819 operands[2] = tmp;
|
|
2820 }
|
|
2821
|
|
2822 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
|
|
2823 operands[2], operands[4],
|
|
2824 INTVAL (operands[3]), exec);
|
|
2825
|
|
2826 /* Masked lanes are required to hold zero. */
|
|
2827 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
|
|
2828
|
|
2829 if (GET_MODE (addr) == V64DImode)
|
|
2830 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
|
|
2831 const0_rtx, const0_rtx,
|
|
2832 const0_rtx, operands[0],
|
|
2833 exec));
|
|
2834 else
|
|
2835 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
|
|
2836 addr, const0_rtx,
|
|
2837 const0_rtx, const0_rtx,
|
|
2838 operands[0], exec));
|
|
2839 DONE;
|
|
2840 })
|
|
2841
|
|
2842 (define_expand "mask_scatter_store<mode>"
|
|
2843 [(match_operand:DI 0 "register_operand")
|
|
2844 (match_operand 1 "register_operand")
|
|
2845 (match_operand 2 "immediate_operand")
|
|
2846 (match_operand:SI 3 "gcn_alu_operand")
|
|
2847 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
|
|
2848 (match_operand:DI 5 "")]
|
|
2849 ""
|
|
2850 {
|
|
2851 rtx exec = force_reg (DImode, operands[5]);
|
|
2852
|
|
2853 /* TODO: more conversions will be needed when more types are vectorized. */
|
|
2854 if (GET_MODE (operands[1]) == V64DImode)
|
|
2855 {
|
|
2856 rtx tmp = gen_reg_rtx (V64SImode);
|
|
2857 emit_insn (gen_truncv64div64si2_exec (tmp, operands[1],
|
|
2858 gcn_gen_undef (V64SImode),
|
|
2859 exec));
|
|
2860 operands[1] = tmp;
|
|
2861 }
|
|
2862
|
|
2863 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
|
|
2864 operands[3], operands[4], exec));
|
|
2865 DONE;
|
|
2866 })
|
|
2867
|
|
2868 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
|
|
2869 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
|
|
2870 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
|
|
2871
|
|
2872 (define_code_iterator cond_op [plus minus])
|
|
2873
|
|
2874 (define_expand "cond_<expander><mode>"
|
|
2875 [(match_operand:COND_MODE 0 "register_operand")
|
|
2876 (match_operand:DI 1 "register_operand")
|
|
2877 (cond_op:COND_MODE
|
|
2878 (match_operand:COND_MODE 2 "gcn_alu_operand")
|
|
2879 (match_operand:COND_MODE 3 "gcn_alu_operand"))
|
|
2880 (match_operand:COND_MODE 4 "register_operand")]
|
|
2881 ""
|
|
2882 {
|
|
2883 operands[1] = force_reg (DImode, operands[1]);
|
|
2884 operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
2885
|
|
2886 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
|
|
2887 operands[3], operands[4],
|
|
2888 operands[1]));
|
|
2889 DONE;
|
|
2890 })
|
|
2891
|
|
2892 (define_code_iterator cond_bitop [and ior xor])
|
|
2893
|
|
2894 (define_expand "cond_<expander><mode>"
|
|
2895 [(match_operand:COND_INT_MODE 0 "register_operand")
|
|
2896 (match_operand:DI 1 "register_operand")
|
|
2897 (cond_bitop:COND_INT_MODE
|
|
2898 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
|
|
2899 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
|
|
2900 (match_operand:COND_INT_MODE 4 "register_operand")]
|
|
2901 ""
|
|
2902 {
|
|
2903 operands[1] = force_reg (DImode, operands[1]);
|
|
2904 operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
2905
|
|
2906 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
|
|
2907 operands[3], operands[4],
|
|
2908 operands[1]));
|
|
2909 DONE;
|
|
2910 })
|
|
2911
|
|
2912 ;; }}}
|
|
2913 ;; {{{ Vector reductions
|
|
2914
|
|
2915 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
|
|
2916 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
|
|
2917 UNSPEC_PLUS_DPP_SHR
|
|
2918 UNSPEC_AND_DPP_SHR
|
|
2919 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
|
|
2920
|
|
2921 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
|
|
2922 UNSPEC_AND_DPP_SHR
|
|
2923 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
|
|
2924
|
|
2925 ; FIXME: Isn't there a better way of doing this?
|
|
2926 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
|
|
2927 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
|
|
2928 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
|
|
2929 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
|
|
2930 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
|
|
2931 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
|
|
2932 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
|
|
2933 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
|
|
2934
|
|
2935 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
|
|
2936 (UNSPEC_SMAX_DPP_SHR "smax")
|
|
2937 (UNSPEC_UMIN_DPP_SHR "umin")
|
|
2938 (UNSPEC_UMAX_DPP_SHR "umax")
|
|
2939 (UNSPEC_PLUS_DPP_SHR "plus")
|
|
2940 (UNSPEC_AND_DPP_SHR "and")
|
|
2941 (UNSPEC_IOR_DPP_SHR "ior")
|
|
2942 (UNSPEC_XOR_DPP_SHR "xor")])
|
|
2943
|
|
2944 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
|
|
2945 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
|
|
2946 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
|
|
2947 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
|
|
2948 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
|
|
2949 (UNSPEC_AND_DPP_SHR "v_and%b0")
|
|
2950 (UNSPEC_IOR_DPP_SHR "v_or%b0")
|
|
2951 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
|
|
2952
|
|
2953 (define_expand "reduc_<reduc_op>_scal_<mode>"
|
|
2954 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
|
|
2955 (unspec:<SCALAR_MODE>
|
|
2956 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
|
|
2957 REDUC_UNSPEC))]
|
|
2958 ""
|
|
2959 {
|
|
2960 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
|
|
2961 <reduc_unspec>);
|
|
2962
|
|
2963 /* The result of the reduction is in lane 63 of tmp. */
|
|
2964 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
|
|
2965
|
|
2966 DONE;
|
|
2967 })
|
|
2968
|
|
2969 (define_expand "reduc_<reduc_op>_scal_v64di"
|
|
2970 [(set (match_operand:DI 0 "register_operand")
|
|
2971 (unspec:DI
|
|
2972 [(match_operand:V64DI 1 "register_operand")]
|
|
2973 REDUC_2REG_UNSPEC))]
|
|
2974 ""
|
|
2975 {
|
|
2976 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
|
|
2977 <reduc_unspec>);
|
|
2978
|
|
2979 /* The result of the reduction is in lane 63 of tmp. */
|
|
2980 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
|
|
2981
|
|
2982 DONE;
|
|
2983 })
|
|
2984
|
|
2985 (define_insn "*<reduc_op>_dpp_shr_<mode>"
|
|
2986 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
|
|
2987 (unspec:VEC_1REG_MODE
|
|
2988 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
|
|
2989 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
|
|
2990 (match_operand:SI 3 "const_int_operand" "n")]
|
|
2991 REDUC_UNSPEC))]
|
|
2992 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
|
|
2993 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
|
|
2994 {
|
|
2995 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
|
|
2996 <reduc_unspec>, INTVAL (operands[3]));
|
|
2997 }
|
|
2998 [(set_attr "type" "vop_dpp")
|
|
2999 (set_attr "length" "8")])
|
|
3000
|
|
3001 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
|
|
3002 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
3003 (unspec:V64DI
|
|
3004 [(match_operand:V64DI 1 "register_operand" "v0")
|
|
3005 (match_operand:V64DI 2 "register_operand" "v0")
|
|
3006 (match_operand:SI 3 "const_int_operand" "n")]
|
|
3007 REDUC_2REG_UNSPEC))]
|
|
3008 ""
|
|
3009 "#"
|
|
3010 "reload_completed"
|
|
3011 [(set (match_dup 4)
|
|
3012 (unspec:V64SI
|
|
3013 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
|
|
3014 (set (match_dup 5)
|
|
3015 (unspec:V64SI
|
|
3016 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
|
|
3017 {
|
|
3018 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
|
|
3019 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
|
|
3020 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
|
|
3021 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
|
|
3022 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
|
|
3023 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
|
|
3024 }
|
|
3025 [(set_attr "type" "vmult")
|
|
3026 (set_attr "length" "16")])
|
|
3027
|
|
3028 ; Special cases for addition.
|
|
3029
|
|
3030 (define_insn "*plus_carry_dpp_shr_v64si"
|
|
3031 [(set (match_operand:V64SI 0 "register_operand" "=v")
|
|
3032 (unspec:V64SI
|
|
3033 [(match_operand:V64SI 1 "register_operand" "v")
|
|
3034 (match_operand:V64SI 2 "register_operand" "v")
|
|
3035 (match_operand:SI 3 "const_int_operand" "n")]
|
|
3036 UNSPEC_PLUS_CARRY_DPP_SHR))
|
|
3037 (clobber (reg:DI VCC_REG))]
|
|
3038 ""
|
|
3039 {
|
|
3040 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
|
|
3041 return gcn_expand_dpp_shr_insn (V64SImode, insn,
|
|
3042 UNSPEC_PLUS_CARRY_DPP_SHR,
|
|
3043 INTVAL (operands[3]));
|
|
3044 }
|
|
3045 [(set_attr "type" "vop_dpp")
|
|
3046 (set_attr "length" "8")])
|
|
3047
|
|
3048 (define_insn "*plus_carry_in_dpp_shr_v64si"
|
|
3049 [(set (match_operand:V64SI 0 "register_operand" "=v")
|
|
3050 (unspec:V64SI
|
|
3051 [(match_operand:V64SI 1 "register_operand" "v")
|
|
3052 (match_operand:V64SI 2 "register_operand" "v")
|
|
3053 (match_operand:SI 3 "const_int_operand" "n")
|
|
3054 (match_operand:DI 4 "register_operand" "cV")]
|
|
3055 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
|
|
3056 (clobber (reg:DI VCC_REG))]
|
|
3057 ""
|
|
3058 {
|
|
3059 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
|
|
3060 return gcn_expand_dpp_shr_insn (V64SImode, insn,
|
|
3061 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
|
|
3062 INTVAL (operands[3]));
|
|
3063 }
|
|
3064 [(set_attr "type" "vop_dpp")
|
|
3065 (set_attr "length" "8")])
|
|
3066
|
|
3067 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
|
|
3068 [(set (match_operand:V64DI 0 "register_operand" "=&v")
|
|
3069 (unspec:V64DI
|
|
3070 [(match_operand:V64DI 1 "register_operand" "v0")
|
|
3071 (match_operand:V64DI 2 "register_operand" "v0")
|
|
3072 (match_operand:SI 3 "const_int_operand" "n")]
|
|
3073 UNSPEC_PLUS_CARRY_DPP_SHR))
|
|
3074 (clobber (reg:DI VCC_REG))]
|
|
3075 ""
|
|
3076 "#"
|
|
3077 "reload_completed"
|
|
3078 [(parallel [(set (match_dup 4)
|
|
3079 (unspec:V64SI
|
|
3080 [(match_dup 6) (match_dup 8) (match_dup 3)]
|
|
3081 UNSPEC_PLUS_CARRY_DPP_SHR))
|
|
3082 (clobber (reg:DI VCC_REG))])
|
|
3083 (parallel [(set (match_dup 5)
|
|
3084 (unspec:V64SI
|
|
3085 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
|
|
3086 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
|
|
3087 (clobber (reg:DI VCC_REG))])]
|
|
3088 {
|
|
3089 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
|
|
3090 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
|
|
3091 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
|
|
3092 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
|
|
3093 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
|
|
3094 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
|
|
3095 }
|
|
3096 [(set_attr "type" "vmult")
|
|
3097 (set_attr "length" "16")])
|
|
3098
|
|
3099 ; Instructions to move a scalar value from lane 63 of a vector register.
|
|
3100 (define_insn "mov_from_lane63_<mode>"
|
|
3101 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
|
|
3102 (unspec:<SCALAR_MODE>
|
|
3103 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
|
|
3104 UNSPEC_MOV_FROM_LANE63))]
|
|
3105 ""
|
|
3106 "@
|
|
3107 v_readlane_b32\t%0, %1, 63
|
|
3108 v_mov_b32\t%0, %1 wave_ror:1"
|
|
3109 [(set_attr "type" "vop3a,vop_dpp")
|
|
3110 (set_attr "exec" "none,*")
|
|
3111 (set_attr "length" "8")])
|
|
3112
|
|
3113 (define_insn "mov_from_lane63_v64di"
|
|
3114 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
|
|
3115 (unspec:DI
|
|
3116 [(match_operand:V64DI 1 "register_operand" "v,v")]
|
|
3117 UNSPEC_MOV_FROM_LANE63))]
|
|
3118 ""
|
|
3119 "@
|
|
3120 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
|
|
3121 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
|
|
3122 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
|
|
3123 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
|
|
3124 else \
|
|
3125 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
|
|
3126 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
|
|
3127 [(set_attr "type" "vop3a,vop_dpp")
|
|
3128 (set_attr "exec" "none,*")
|
|
3129 (set_attr "length" "8")])
|
|
3130
|
|
3131 ;; }}}
|
|
3132 ;; {{{ Miscellaneous
|
|
3133
|
|
3134 (define_expand "vec_seriesv64si"
|
|
3135 [(match_operand:V64SI 0 "register_operand")
|
|
3136 (match_operand:SI 1 "gcn_alu_operand")
|
|
3137 (match_operand:SI 2 "gcn_alu_operand")]
|
|
3138 ""
|
|
3139 {
|
|
3140 rtx tmp = gen_reg_rtx (V64SImode);
|
|
3141 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
|
|
3142
|
|
3143 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
|
|
3144 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
|
|
3145 DONE;
|
|
3146 })
|
|
3147
|
|
3148 (define_expand "vec_seriesv64di"
|
|
3149 [(match_operand:V64DI 0 "register_operand")
|
|
3150 (match_operand:DI 1 "gcn_alu_operand")
|
|
3151 (match_operand:DI 2 "gcn_alu_operand")]
|
|
3152 ""
|
|
3153 {
|
|
3154 rtx tmp = gen_reg_rtx (V64DImode);
|
|
3155 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
|
|
3156 rtx op1vec = gen_reg_rtx (V64DImode);
|
|
3157
|
|
3158 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
|
|
3159 emit_insn (gen_vec_duplicatev64si (op1vec, operands[1]));
|
|
3160 emit_insn (gen_addv64di3 (operands[0], tmp, op1vec));
|
|
3161 DONE;
|
|
3162 })
|
|
3163
|
|
3164 ;; }}}
|