comparison gcc/config/aarch64/aarch64-sve2.md @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 ;; Machine description for AArch64 SVE2.
2 ;; Copyright (C) 2019-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; The file is organised into the following sections (search for the full
22 ;; line):
23 ;;
24 ;; == Moves
25 ;; ---- Non-temporal gather loads
26 ;; ---- Non-temporal scatter stores
27 ;;
28 ;; == Uniform binary arithmnetic
29 ;; ---- [INT] Multiplication
30 ;; ---- [INT] Scaled high-part multiplication
31 ;; ---- [INT] General binary arithmetic that maps to unspecs
32 ;; ---- [INT] Saturating binary arithmetic
33 ;; ---- [INT] Saturating left shifts
34 ;;
35 ;; == Uniform ternary arithmnetic
36 ;; ---- [INT] General ternary arithmetic that maps to unspecs
37 ;; ---- [INT] Multiply-and-accumulate operations
38 ;; ---- [INT] Binary logic operations with rotation
39 ;; ---- [INT] Ternary logic operations
40 ;; ---- [INT] Shift-and-accumulate operations
41 ;; ---- [INT] Shift-and-insert operations
42 ;; ---- [INT] Sum of absolute differences
43 ;;
44 ;; == Extending arithmetic
45 ;; ---- [INT] Wide binary arithmetic
46 ;; ---- [INT] Long binary arithmetic
47 ;; ---- [INT] Long left shifts
48 ;; ---- [INT] Long binary arithmetic with accumulation
49 ;; ---- [FP] Long multiplication with accumulation
50 ;;
51 ;; == Narrowing arithnetic
52 ;; ---- [INT] Narrowing unary arithmetic
53 ;; ---- [INT] Narrowing binary arithmetic
54 ;; ---- [INT] Narrowing right shifts
55 ;;
56 ;; == Pairwise arithmetic
57 ;; ---- [INT] Pairwise arithmetic
58 ;; ---- [FP] Pairwise arithmetic
59 ;; ---- [INT] Pairwise arithmetic with accumulation
60 ;;
61 ;; == Complex arithmetic
62 ;; ---- [INT] Complex binary operations
63 ;; ---- [INT] Complex ternary operations
64 ;; ---- [INT] Complex dot product
65 ;;
66 ;; == Conversions
67 ;; ---- [FP<-FP] Widening conversions
68 ;; ---- [FP<-FP] Narrowing conversions
69 ;;
70 ;; == Other arithmetic
71 ;; ---- [INT] Reciprocal approximation
72 ;; ---- [INT<-FP] Base-2 logarithm
73 ;; ---- [INT] Polynomial multiplication
74 ;;
75 ;; == Permutation
76 ;; ---- [INT,FP] General permutes
77 ;; ---- [INT] Optional bit-permute extensions
78 ;;
79 ;; == General
80 ;; ---- Check for aliases between pointers
81 ;; ---- Histogram processing
82 ;; ---- String matching
83 ;;
84 ;; == Crypotographic extensions
85 ;; ---- Optional AES extensions
86 ;; ---- Optional SHA-3 extensions
87 ;; ---- Optional SM4 extensions
88
89 ;; =========================================================================
90 ;; == Moves
91 ;; =========================================================================
92
93 ;; -------------------------------------------------------------------------
94 ;; ---- Non-temporal gather loads
95 ;; -------------------------------------------------------------------------
96 ;; Includes gather forms of:
97 ;; - LDNT1B
98 ;; - LDNT1D
99 ;; - LDNT1H
100 ;; - LDNT1W
101 ;; -------------------------------------------------------------------------
102
103 ;; Non-extending loads.
104 (define_insn "@aarch64_gather_ldnt<mode>"
105 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
106 (unspec:SVE_FULL_SD
107 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
108 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
109 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
110 (mem:BLK (scratch))]
111 UNSPEC_LDNT1_GATHER))]
112 "TARGET_SVE2"
113 "@
114 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
115 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
116 )
117
118 ;; Extending loads.
119 (define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
120 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
121 (unspec:SVE_FULL_SDI
122 [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
123 (ANY_EXTEND:SVE_FULL_SDI
124 (unspec:SVE_PARTIAL_I
125 [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
126 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
127 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
128 (mem:BLK (scratch))]
129 UNSPEC_LDNT1_GATHER))]
130 UNSPEC_PRED_X))]
131 "TARGET_SVE2
132 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
133 "@
134 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
135 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
136 "&& !CONSTANT_P (operands[4])"
137 {
138 operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
139 }
140 )
141
142 ;; -------------------------------------------------------------------------
143 ;; ---- Non-temporal scatter stores
144 ;; -------------------------------------------------------------------------
145 ;; Includes scatter forms of:
146 ;; - STNT1B
147 ;; - STNT1D
148 ;; - STNT1H
149 ;; - STNT1W
150 ;; -------------------------------------------------------------------------
151
152 ;; Non-truncating stores.
153 (define_insn "@aarch64_scatter_stnt<mode>"
154 [(set (mem:BLK (scratch))
155 (unspec:BLK
156 [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
157 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
158 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
159 (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
160
161 UNSPEC_STNT1_SCATTER))]
162 "TARGET_SVE"
163 "@
164 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
165 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
166 )
167
168 ;; Truncating stores.
169 (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
170 [(set (mem:BLK (scratch))
171 (unspec:BLK
172 [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
173 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
174 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
175 (truncate:SVE_PARTIAL_I
176 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
177 UNSPEC_STNT1_SCATTER))]
178 "TARGET_SVE2
179 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
180 "@
181 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
182 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
183 )
184
185 ;; =========================================================================
186 ;; == Uniform binary arithmnetic
187 ;; =========================================================================
188
189 ;; -------------------------------------------------------------------------
190 ;; ---- [INT] Multiplication
191 ;; -------------------------------------------------------------------------
192 ;; Includes the lane forms of:
193 ;; - MUL
194 ;; -------------------------------------------------------------------------
195
196 (define_insn "@aarch64_mul_lane_<mode>"
197 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
198 (mult:SVE_FULL_HSDI
199 (unspec:SVE_FULL_HSDI
200 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
201 (match_operand:SI 3 "const_int_operand")]
202 UNSPEC_SVE_LANE_SELECT)
203 (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
204 "TARGET_SVE2"
205 "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
206 )
207
208 ;; -------------------------------------------------------------------------
209 ;; ---- [INT] Scaled high-part multiplication
210 ;; -------------------------------------------------------------------------
211 ;; The patterns in this section are synthetic.
212 ;; -------------------------------------------------------------------------
213
214 ;; Unpredicated integer multiply-high-with-(round-and-)scale.
215 (define_expand "<su>mulh<r>s<mode>3"
216 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
217 (unspec:SVE_FULL_BHSI
218 [(match_dup 3)
219 (unspec:SVE_FULL_BHSI
220 [(match_operand:SVE_FULL_BHSI 1 "register_operand")
221 (match_operand:SVE_FULL_BHSI 2 "register_operand")]
222 MULHRS)]
223 UNSPEC_PRED_X))]
224 "TARGET_SVE2"
225 {
226 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
227
228 rtx prod_b = gen_reg_rtx (<VWIDE>mode);
229 rtx prod_t = gen_reg_rtx (<VWIDE>mode);
230 emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
231 operands[2]));
232 emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
233 operands[2]));
234
235 rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
236 emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
237 emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
238 prod_t, shift));
239
240 DONE;
241 }
242 )
243
244 ;; -------------------------------------------------------------------------
245 ;; ---- [INT] General binary arithmetic that maps to unspecs
246 ;; -------------------------------------------------------------------------
247 ;; Includes:
248 ;; - SHADD
249 ;; - SHSUB
250 ;; - SHSUBR
251 ;; - SQRSHL
252 ;; - SQRSHLR
253 ;; - SRHADD
254 ;; - SRSHL
255 ;; - SRSHLR
256 ;; - SUQADD
257 ;; - UHADD
258 ;; - UHSUB
259 ;; - UHSUBR
260 ;; - UQRSHL
261 ;; - UQRSHLR
262 ;; - URHADD
263 ;; - URSHL
264 ;; - URSHLR
265 ;; - USQADD
266 ;; -------------------------------------------------------------------------
267
268 ;; Integer average (floor).
269 (define_expand "<u>avg<mode>3_floor"
270 [(set (match_operand:SVE_FULL_I 0 "register_operand")
271 (unspec:SVE_FULL_I
272 [(match_dup 3)
273 (unspec:SVE_FULL_I
274 [(match_operand:SVE_FULL_I 1 "register_operand")
275 (match_operand:SVE_FULL_I 2 "register_operand")]
276 HADD)]
277 UNSPEC_PRED_X))]
278 "TARGET_SVE2"
279 {
280 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
281 }
282 )
283
284 ;; Integer average (rounding).
285 (define_expand "<u>avg<mode>3_ceil"
286 [(set (match_operand:SVE_FULL_I 0 "register_operand")
287 (unspec:SVE_FULL_I
288 [(match_dup 3)
289 (unspec:SVE_FULL_I
290 [(match_operand:SVE_FULL_I 1 "register_operand")
291 (match_operand:SVE_FULL_I 2 "register_operand")]
292 RHADD)]
293 UNSPEC_PRED_X))]
294 "TARGET_SVE2"
295 {
296 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
297 }
298 )
299
300 ;; The immediate form of SQADD acts as an immediate form of SUQADD
301 ;; over its full range. In contrast to the ss_plus pattern, we do
302 ;; not need to treat byte immediates specially. E.g.:
303 ;;
304 ;; SQADD Z0.B, Z0.B, #128
305 ;;
306 ;; is equivalent to:
307 ;;
308 ;; MOV Z1.B, #128
309 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
310 ;;
311 ;; even though it's not equivalent to:
312 ;;
313 ;; MOV Z1.B, #128
314 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
315 (define_insn "@aarch64_sve_suqadd<mode>_const"
316 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
317 (unspec:SVE_FULL_I
318 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
319 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
320 UNSPEC_SUQADD))]
321 "TARGET_SVE2"
322 "@
323 sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
324 movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
325 [(set_attr "movprfx" "*,yes")]
326 )
327
328 ;; General predicated binary arithmetic. All operations handled here
329 ;; are commutative or have a reversed form.
330 (define_insn "@aarch64_pred_<sve_int_op><mode>"
331 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
332 (unspec:SVE_FULL_I
333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
334 (unspec:SVE_FULL_I
335 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
336 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
337 SVE2_COND_INT_BINARY_REV)]
338 UNSPEC_PRED_X))]
339 "TARGET_SVE2"
340 "@
341 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
342 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
343 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
344 [(set_attr "movprfx" "*,*,yes")]
345 )
346
347 ;; Predicated binary arithmetic with merging.
348 (define_expand "@cond_<sve_int_op><mode>"
349 [(set (match_operand:SVE_FULL_I 0 "register_operand")
350 (unspec:SVE_FULL_I
351 [(match_operand:<VPRED> 1 "register_operand")
352 (unspec:SVE_FULL_I
353 [(match_dup 5)
354 (unspec:SVE_FULL_I
355 [(match_operand:SVE_FULL_I 2 "register_operand")
356 (match_operand:SVE_FULL_I 3 "register_operand")]
357 SVE2_COND_INT_BINARY)]
358 UNSPEC_PRED_X)
359 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
360 UNSPEC_SEL))]
361 "TARGET_SVE2"
362 {
363 operands[5] = CONSTM1_RTX (<MODE>mode);
364 }
365 )
366
367 ;; Predicated binary arithmetic, merging with the first input.
368 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
369 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
370 (unspec:SVE_FULL_I
371 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
372 (unspec:SVE_FULL_I
373 [(match_operand 4)
374 (unspec:SVE_FULL_I
375 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
376 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
377 SVE2_COND_INT_BINARY)]
378 UNSPEC_PRED_X)
379 (match_dup 2)]
380 UNSPEC_SEL))]
381 "TARGET_SVE2"
382 "@
383 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
384 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
385 "&& !CONSTANT_P (operands[4])"
386 {
387 operands[4] = CONSTM1_RTX (<VPRED>mode);
388 }
389 [(set_attr "movprfx" "*,yes")]
390 )
391
392 ;; Predicated binary arithmetic, merging with the second input.
393 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
394 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
395 (unspec:SVE_FULL_I
396 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
397 (unspec:SVE_FULL_I
398 [(match_operand 4)
399 (unspec:SVE_FULL_I
400 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
401 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
402 SVE2_COND_INT_BINARY_REV)]
403 UNSPEC_PRED_X)
404 (match_dup 3)]
405 UNSPEC_SEL))]
406 "TARGET_SVE2"
407 "@
408 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
409 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
410 "&& !CONSTANT_P (operands[4])"
411 {
412 operands[4] = CONSTM1_RTX (<VPRED>mode);
413 }
414 [(set_attr "movprfx" "*,yes")]
415 )
416
417 ;; Predicated binary operations, merging with an independent value.
418 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
419 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
420 (unspec:SVE_FULL_I
421 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
422 (unspec:SVE_FULL_I
423 [(match_operand 5)
424 (unspec:SVE_FULL_I
425 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
426 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
427 SVE2_COND_INT_BINARY_REV)]
428 UNSPEC_PRED_X)
429 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
430 UNSPEC_SEL))]
431 "TARGET_SVE2
432 && !rtx_equal_p (operands[2], operands[4])
433 && !rtx_equal_p (operands[3], operands[4])"
434 "@
435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
439 #"
440 "&& 1"
441 {
442 if (reload_completed
443 && register_operand (operands[4], <MODE>mode)
444 && !rtx_equal_p (operands[0], operands[4]))
445 {
446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
447 operands[4], operands[1]));
448 operands[4] = operands[2] = operands[0];
449 }
450 else if (!CONSTANT_P (operands[5]))
451 operands[5] = CONSTM1_RTX (<VPRED>mode);
452 else
453 FAIL;
454 }
455 [(set_attr "movprfx" "yes")]
456 )
457
458 ;; Predicated binary operations with no reverse form, merging with zero.
459 ;; At present we don't generate these patterns via a cond_* optab,
460 ;; so there's no correctness requirement to handle merging with an
461 ;; independent value.
462 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
463 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
464 (unspec:SVE_FULL_I
465 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
466 (unspec:SVE_FULL_I
467 [(match_operand 5)
468 (unspec:SVE_FULL_I
469 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
470 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
471 SVE2_COND_INT_BINARY_NOREV)]
472 UNSPEC_PRED_X)
473 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
474 UNSPEC_SEL))]
475 "TARGET_SVE2"
476 "@
477 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
478 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
479 "&& !CONSTANT_P (operands[5])"
480 {
481 operands[5] = CONSTM1_RTX (<VPRED>mode);
482 }
483 [(set_attr "movprfx" "yes")]
484 )
485
486 ;; -------------------------------------------------------------------------
487 ;; ---- [INT] Saturating binary arithmetic
488 ;; -------------------------------------------------------------------------
489 ;; Includes:
490 ;; - SQDMULH
491 ;; - SQRDMULH
492 ;; -------------------------------------------------------------------------
493
494 (define_insn "@aarch64_sve_<sve_int_op><mode>"
495 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
496 (unspec:SVE_FULL_I
497 [(match_operand:SVE_FULL_I 1 "register_operand" "w")
498 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
499 SVE2_INT_BINARY))]
500 "TARGET_SVE2"
501 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
502 )
503
504 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
505 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
506 (unspec:SVE_FULL_HSDI
507 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
508 (unspec:SVE_FULL_HSDI
509 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
510 (match_operand:SI 3 "const_int_operand")]
511 UNSPEC_SVE_LANE_SELECT)]
512 SVE2_INT_BINARY_LANE))]
513 "TARGET_SVE2"
514 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
515 )
516
517 ;; -------------------------------------------------------------------------
518 ;; ---- [INT] Saturating left shifts
519 ;; -------------------------------------------------------------------------
520 ;; Includes:
521 ;; - SQSHL
522 ;; - SQSHLR
523 ;; - UQSHL
524 ;; - UQSHLR
525 ;; -------------------------------------------------------------------------
526
527 ;; Predicated left shifts.
528 (define_insn "@aarch64_pred_<sve_int_op><mode>"
529 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
530 (unspec:SVE_FULL_I
531 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
532 (unspec:SVE_FULL_I
533 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
534 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
535 SVE2_COND_INT_SHIFT)]
536 UNSPEC_PRED_X))]
537 "TARGET_SVE2"
538 "@
539 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
540 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
541 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
542 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
543 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
544 [(set_attr "movprfx" "*,*,*,yes,yes")]
545 )
546
547 ;; Predicated left shifts with merging.
548 (define_expand "@cond_<sve_int_op><mode>"
549 [(set (match_operand:SVE_FULL_I 0 "register_operand")
550 (unspec:SVE_FULL_I
551 [(match_operand:<VPRED> 1 "register_operand")
552 (unspec:SVE_FULL_I
553 [(match_dup 5)
554 (unspec:SVE_FULL_I
555 [(match_operand:SVE_FULL_I 2 "register_operand")
556 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
557 SVE2_COND_INT_SHIFT)]
558 UNSPEC_PRED_X)
559 (match_operand:SVE_FULL_I 4 "register_operand")]
560 UNSPEC_SEL))]
561 "TARGET_SVE2"
562 {
563 operands[5] = CONSTM1_RTX (<VPRED>mode);
564 }
565 )
566
567 ;; Predicated left shifts, merging with the first input.
568 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
569 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
570 (unspec:SVE_FULL_I
571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
572 (unspec:SVE_FULL_I
573 [(match_operand 4)
574 (unspec:SVE_FULL_I
575 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
576 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
577 SVE2_COND_INT_SHIFT)]
578 UNSPEC_PRED_X)
579 (match_dup 2)]
580 UNSPEC_SEL))]
581 "TARGET_SVE2"
582 "@
583 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
584 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
585 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
586 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
587 "&& !CONSTANT_P (operands[4])"
588 {
589 operands[4] = CONSTM1_RTX (<VPRED>mode);
590 }
591 [(set_attr "movprfx" "*,*,yes,yes")]
592 )
593
594 ;; Predicated left shifts, merging with the second input.
595 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
596 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
597 (unspec:SVE_FULL_I
598 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
599 (unspec:SVE_FULL_I
600 [(match_operand 4)
601 (unspec:SVE_FULL_I
602 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
603 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
604 SVE2_COND_INT_SHIFT)]
605 UNSPEC_PRED_X)
606 (match_dup 3)]
607 UNSPEC_SEL))]
608 "TARGET_SVE2"
609 "@
610 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
611 movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
612 "&& !CONSTANT_P (operands[4])"
613 {
614 operands[4] = CONSTM1_RTX (<VPRED>mode);
615 }
616 [(set_attr "movprfx" "*,yes")]
617 )
618
619 ;; Predicated left shifts, merging with an independent value.
620 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
621 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
622 (unspec:SVE_FULL_I
623 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
624 (unspec:SVE_FULL_I
625 [(match_operand 5)
626 (unspec:SVE_FULL_I
627 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
628 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
629 SVE2_COND_INT_SHIFT)]
630 UNSPEC_PRED_X)
631 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
632 UNSPEC_SEL))]
633 "TARGET_SVE2
634 && !rtx_equal_p (operands[2], operands[4])
635 && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
636 "@
637 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
638 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
639 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
640 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
643 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
644 #
645 #"
646 "&& 1"
647 {
648 if (reload_completed
649 && register_operand (operands[4], <MODE>mode)
650 && !rtx_equal_p (operands[0], operands[4]))
651 {
652 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
653 operands[4], operands[1]));
654 operands[4] = operands[2] = operands[0];
655 }
656 else if (!CONSTANT_P (operands[5]))
657 operands[5] = CONSTM1_RTX (<VPRED>mode);
658 else
659 FAIL;
660 }
661 [(set_attr "movprfx" "yes")]
662 )
663
664 ;; =========================================================================
665 ;; == Uniform ternary arithmnetic
666 ;; =========================================================================
667
668 ;; -------------------------------------------------------------------------
669 ;; ---- [INT] General ternary arithmetic that maps to unspecs
670 ;; -------------------------------------------------------------------------
671 ;; Includes:
672 ;; - ADCLB
673 ;; - ADCLT
674 ;; - EORBT
675 ;; - EORTB
676 ;; - SBCLB
677 ;; - SBCLT
678 ;; - SQRDMLAH
679 ;; - SQRDMLSH
680 ;; -------------------------------------------------------------------------
681
682 (define_insn "@aarch64_sve_<sve_int_op><mode>"
683 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
684 (unspec:SVE_FULL_I
685 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
686 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
687 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
688 SVE2_INT_TERNARY))]
689 "TARGET_SVE2"
690 "@
691 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
692 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
693 )
694
695 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
696 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
697 (unspec:SVE_FULL_HSDI
698 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
699 (unspec:SVE_FULL_HSDI
700 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
701 (match_operand:SI 4 "const_int_operand")]
702 UNSPEC_SVE_LANE_SELECT)
703 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
704 SVE2_INT_TERNARY_LANE))]
705 "TARGET_SVE2"
706 "@
707 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
708 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
709 )
710
711 ;; -------------------------------------------------------------------------
712 ;; ---- [INT] Multiply-and-accumulate operations
713 ;; -------------------------------------------------------------------------
714 ;; Includes the lane forms of:
715 ;; - MLA
716 ;; - MLS
717 ;; -------------------------------------------------------------------------
718
719 (define_insn "@aarch64_sve_add_mul_lane_<mode>"
720 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
721 (plus:SVE_FULL_HSDI
722 (mult:SVE_FULL_HSDI
723 (unspec:SVE_FULL_HSDI
724 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
725 (match_operand:SI 4 "const_int_operand")]
726 UNSPEC_SVE_LANE_SELECT)
727 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
728 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
729 "TARGET_SVE2"
730 "@
731 mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
732 movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
733 [(set_attr "movprfx" "*,yes")]
734 )
735
736 (define_insn "@aarch64_sve_sub_mul_lane_<mode>"
737 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
738 (minus:SVE_FULL_HSDI
739 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
740 (mult:SVE_FULL_HSDI
741 (unspec:SVE_FULL_HSDI
742 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
743 (match_operand:SI 4 "const_int_operand")]
744 UNSPEC_SVE_LANE_SELECT)
745 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
746 "TARGET_SVE2"
747 "@
748 mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
749 movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
750 [(set_attr "movprfx" "*,yes")]
751 )
752
753 ;; -------------------------------------------------------------------------
754 ;; ---- [INT] Binary logic operations with rotation
755 ;; -------------------------------------------------------------------------
756 ;; Includes:
757 ;; - XAR
758 ;; -------------------------------------------------------------------------
759
760 (define_insn "@aarch64_sve2_xar<mode>"
761 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
762 (rotatert:SVE_FULL_I
763 (xor:SVE_FULL_I
764 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
765 (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
766 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
767 "TARGET_SVE2"
768 "@
769 xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
770 movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
771 [(set_attr "movprfx" "*,yes")]
772 )
773
774 ;; -------------------------------------------------------------------------
775 ;; ---- [INT] Ternary logic operations
776 ;; -------------------------------------------------------------------------
777 ;; Includes:
778 ;; - BCAX
779 ;; - BSL
780 ;; - BSL1N
781 ;; - BSL2N
782 ;; - EOR3
783 ;; - NBSL
784 ;; -------------------------------------------------------------------------
785
786 ;; Unpredicated exclusive OR of AND.
787 (define_insn "@aarch64_sve2_bcax<mode>"
788 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
789 (xor:SVE_FULL_I
790 (and:SVE_FULL_I
791 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
792 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
793 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
794 "TARGET_SVE2"
795 "@
796 bcax\t%0.d, %0.d, %2.d, %3.d
797 movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
798 [(set_attr "movprfx" "*,yes")]
799 )
800
801 ;; Unpredicated 3-way exclusive OR.
802 (define_insn "@aarch64_sve2_eor3<mode>"
803 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
804 (xor:SVE_FULL_I
805 (xor:SVE_FULL_I
806 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w")
807 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w"))
808 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))]
809 "TARGET_SVE2"
810 "@
811 eor3\t%0.d, %0.d, %2.d, %3.d
812 eor3\t%0.d, %0.d, %1.d, %3.d
813 eor3\t%0.d, %0.d, %1.d, %2.d
814 movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
815 [(set_attr "movprfx" "*,*,*,yes")]
816 )
817
818 ;; Use NBSL for vector NOR.
819 (define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
820 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
821 (unspec:SVE_FULL_I
822 [(match_operand 3)
823 (and:SVE_FULL_I
824 (not:SVE_FULL_I
825 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
826 (not:SVE_FULL_I
827 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
828 UNSPEC_PRED_X))]
829 "TARGET_SVE2"
830 "@
831 nbsl\t%0.d, %0.d, %2.d, %0.d
832 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
833 "&& !CONSTANT_P (operands[3])"
834 {
835 operands[3] = CONSTM1_RTX (<VPRED>mode);
836 }
837 [(set_attr "movprfx" "*,yes")]
838 )
839
840 ;; Use NBSL for vector NAND.
841 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
842 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
843 (unspec:SVE_FULL_I
844 [(match_operand 3)
845 (ior:SVE_FULL_I
846 (not:SVE_FULL_I
847 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
848 (not:SVE_FULL_I
849 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
850 UNSPEC_PRED_X))]
851 "TARGET_SVE2"
852 "@
853 nbsl\t%0.d, %0.d, %2.d, %2.d
854 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
855 "&& !CONSTANT_P (operands[3])"
856 {
857 operands[3] = CONSTM1_RTX (<VPRED>mode);
858 }
859 [(set_attr "movprfx" "*,yes")]
860 )
861
862 ;; Unpredicated bitwise select.
863 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
864 (define_expand "@aarch64_sve2_bsl<mode>"
865 [(set (match_operand:SVE_FULL_I 0 "register_operand")
866 (xor:SVE_FULL_I
867 (and:SVE_FULL_I
868 (xor:SVE_FULL_I
869 (match_operand:SVE_FULL_I 1 "register_operand")
870 (match_operand:SVE_FULL_I 2 "register_operand"))
871 (match_operand:SVE_FULL_I 3 "register_operand"))
872 (match_dup 2)))]
873 "TARGET_SVE2"
874 )
875
876 (define_insn "*aarch64_sve2_bsl<mode>"
877 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
878 (xor:SVE_FULL_I
879 (and:SVE_FULL_I
880 (xor:SVE_FULL_I
881 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
882 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
883 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
884 (match_dup BSL_DUP)))]
885 "TARGET_SVE2"
886 "@
887 bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
888 movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
889 [(set_attr "movprfx" "*,yes")]
890 )
891
892 ;; Unpredicated bitwise inverted select.
893 ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
894 (define_expand "@aarch64_sve2_nbsl<mode>"
895 [(set (match_operand:SVE_FULL_I 0 "register_operand")
896 (unspec:SVE_FULL_I
897 [(match_dup 4)
898 (not:SVE_FULL_I
899 (xor:SVE_FULL_I
900 (and:SVE_FULL_I
901 (xor:SVE_FULL_I
902 (match_operand:SVE_FULL_I 1 "register_operand")
903 (match_operand:SVE_FULL_I 2 "register_operand"))
904 (match_operand:SVE_FULL_I 3 "register_operand"))
905 (match_dup 2)))]
906 UNSPEC_PRED_X))]
907 "TARGET_SVE2"
908 {
909 operands[4] = CONSTM1_RTX (<VPRED>mode);
910 }
911 )
912
913 (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
914 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
915 (unspec:SVE_FULL_I
916 [(match_operand 4)
917 (not:SVE_FULL_I
918 (xor:SVE_FULL_I
919 (and:SVE_FULL_I
920 (xor:SVE_FULL_I
921 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
922 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
923 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
924 (match_dup BSL_DUP)))]
925 UNSPEC_PRED_X))]
926 "TARGET_SVE2"
927 "@
928 nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
929 movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
930 "&& !CONSTANT_P (operands[4])"
931 {
932 operands[4] = CONSTM1_RTX (<VPRED>mode);
933 }
934 [(set_attr "movprfx" "*,yes")]
935 )
936
937 ;; Unpredicated bitwise select with inverted first operand.
938 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
939 (define_expand "@aarch64_sve2_bsl1n<mode>"
940 [(set (match_operand:SVE_FULL_I 0 "register_operand")
941 (xor:SVE_FULL_I
942 (and:SVE_FULL_I
943 (unspec:SVE_FULL_I
944 [(match_dup 4)
945 (not:SVE_FULL_I
946 (xor:SVE_FULL_I
947 (match_operand:SVE_FULL_I 1 "register_operand")
948 (match_operand:SVE_FULL_I 2 "register_operand")))]
949 UNSPEC_PRED_X)
950 (match_operand:SVE_FULL_I 3 "register_operand"))
951 (match_dup 2)))]
952 "TARGET_SVE2"
953 {
954 operands[4] = CONSTM1_RTX (<VPRED>mode);
955 }
956 )
957
958 (define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
959 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
960 (xor:SVE_FULL_I
961 (and:SVE_FULL_I
962 (unspec:SVE_FULL_I
963 [(match_operand 4)
964 (not:SVE_FULL_I
965 (xor:SVE_FULL_I
966 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
967 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))]
968 UNSPEC_PRED_X)
969 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
970 (match_dup BSL_DUP)))]
971 "TARGET_SVE2"
972 "@
973 bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
974 movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
975 "&& !CONSTANT_P (operands[4])"
976 {
977 operands[4] = CONSTM1_RTX (<VPRED>mode);
978 }
979 [(set_attr "movprfx" "*,yes")]
980 )
981
982 ;; Unpredicated bitwise select with inverted second operand.
983 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
984 (define_expand "@aarch64_sve2_bsl2n<mode>"
985 [(set (match_operand:SVE_FULL_I 0 "register_operand")
986 (ior:SVE_FULL_I
987 (and:SVE_FULL_I
988 (match_operand:SVE_FULL_I 1 "register_operand")
989 (match_operand:SVE_FULL_I 3 "register_operand"))
990 (unspec:SVE_FULL_I
991 [(match_dup 4)
992 (and:SVE_FULL_I
993 (not:SVE_FULL_I
994 (match_operand:SVE_FULL_I 2 "register_operand"))
995 (not:SVE_FULL_I
996 (match_dup 3)))]
997 UNSPEC_PRED_X)))]
998 "TARGET_SVE2"
999 {
1000 operands[4] = CONSTM1_RTX (<VPRED>mode);
1001 }
1002 )
1003
1004 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1005 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1006 (ior:SVE_FULL_I
1007 (and:SVE_FULL_I
1008 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
1009 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
1010 (unspec:SVE_FULL_I
1011 [(match_operand 4)
1012 (and:SVE_FULL_I
1013 (not:SVE_FULL_I
1014 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
1015 (not:SVE_FULL_I
1016 (match_dup BSL_DUP)))]
1017 UNSPEC_PRED_X)))]
1018 "TARGET_SVE2"
1019 "@
1020 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1021 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
1022 "&& !CONSTANT_P (operands[4])"
1023 {
1024 operands[4] = CONSTM1_RTX (<VPRED>mode);
1025 }
1026 [(set_attr "movprfx" "*,yes")]
1027 )
1028
1029 ;; Unpredicated bitwise select with inverted second operand, alternative form.
1030 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
1031 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1032 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1033 (ior:SVE_FULL_I
1034 (and:SVE_FULL_I
1035 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
1036 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
1037 (unspec:SVE_FULL_I
1038 [(match_operand 4)
1039 (and:SVE_FULL_I
1040 (not:SVE_FULL_I
1041 (match_dup BSL_DUP))
1042 (not:SVE_FULL_I
1043 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))]
1044 UNSPEC_PRED_X)))]
1045 "TARGET_SVE2"
1046 "@
1047 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1048 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
1049 "&& !CONSTANT_P (operands[4])"
1050 {
1051 operands[4] = CONSTM1_RTX (<VPRED>mode);
1052 }
1053 [(set_attr "movprfx" "*,yes")]
1054 )
1055
1056 ;; -------------------------------------------------------------------------
1057 ;; ---- [INT] Shift-and-accumulate operations
1058 ;; -------------------------------------------------------------------------
1059 ;; Includes:
1060 ;; - SRSRA
1061 ;; - SSRA
1062 ;; - URSRA
1063 ;; - USRA
1064 ;; -------------------------------------------------------------------------
1065
1066 ;; Provide the natural unpredicated interface for SSRA and USRA.
1067 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
1068 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1069 (plus:SVE_FULL_I
1070 (unspec:SVE_FULL_I
1071 [(match_dup 4)
1072 (SHIFTRT:SVE_FULL_I
1073 (match_operand:SVE_FULL_I 2 "register_operand")
1074 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1075 UNSPEC_PRED_X)
1076 (match_operand:SVE_FULL_I 1 "register_operand")))]
1077 "TARGET_SVE2"
1078 {
1079 operands[4] = CONSTM1_RTX (<VPRED>mode);
1080 }
1081 )
1082
1083 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
1084 ;; isn't needed.
1085 (define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
1086 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1087 (plus:SVE_FULL_I
1088 (unspec:SVE_FULL_I
1089 [(match_operand 4)
1090 (SHIFTRT:SVE_FULL_I
1091 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1092 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1093 UNSPEC_PRED_X)
1094 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1095 "TARGET_SVE2"
1096 "@
1097 <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1098 movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
1099 "&& !CONSTANT_P (operands[4])"
1100 {
1101 operands[4] = CONSTM1_RTX (<VPRED>mode);
1102 }
1103 [(set_attr "movprfx" "*,yes")]
1104 )
1105
1106 ;; SRSRA and URSRA.
1107 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1108 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1109 (plus:SVE_FULL_I
1110 (unspec:SVE_FULL_I
1111 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1112 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
1113 VRSHR_N)
1114 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1115 "TARGET_SVE2"
1116 "@
1117 <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1118 movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
1119 [(set_attr "movprfx" "*,yes")]
1120 )
1121
1122 ;; -------------------------------------------------------------------------
1123 ;; ---- [INT] Shift-and-insert operations
1124 ;; -------------------------------------------------------------------------
1125 ;; Includes:
1126 ;; - SLI
1127 ;; - SRI
1128 ;; -------------------------------------------------------------------------
1129
1130 ;; These instructions do not take MOVPRFX.
1131 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1132 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1133 (unspec:SVE_FULL_I
1134 [(match_operand:SVE_FULL_I 1 "register_operand" "0")
1135 (match_operand:SVE_FULL_I 2 "register_operand" "w")
1136 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
1137 SVE2_INT_SHIFT_INSERT))]
1138 "TARGET_SVE2"
1139 "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
1140 )
1141
1142 ;; -------------------------------------------------------------------------
1143 ;; ---- [INT] Sum of absolute differences
1144 ;; -------------------------------------------------------------------------
1145 ;; Includes:
1146 ;; - SABA
1147 ;; - UABA
1148 ;; -------------------------------------------------------------------------
1149
1150 ;; Provide the natural unpredicated interface for SABA and UABA.
1151 (define_expand "@aarch64_sve2_<su>aba<mode>"
1152 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1153 (plus:SVE_FULL_I
1154 (minus:SVE_FULL_I
1155 (unspec:SVE_FULL_I
1156 [(match_dup 4)
1157 (USMAX:SVE_FULL_I
1158 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1159 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
1160 UNSPEC_PRED_X)
1161 (unspec:SVE_FULL_I
1162 [(match_dup 4)
1163 (<max_opp>:SVE_FULL_I
1164 (match_dup 2)
1165 (match_dup 3))]
1166 UNSPEC_PRED_X))
1167 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1168 "TARGET_SVE2"
1169 {
1170 operands[4] = CONSTM1_RTX (<VPRED>mode);
1171 }
1172 )
1173
1174 ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
1175 ;; operation whose predicates aren't needed.
1176 (define_insn "*aarch64_sve2_<su>aba<mode>"
1177 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1178 (plus:SVE_FULL_I
1179 (minus:SVE_FULL_I
1180 (unspec:SVE_FULL_I
1181 [(match_operand 4)
1182 (USMAX:SVE_FULL_I
1183 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1184 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
1185 UNSPEC_PRED_X)
1186 (unspec:SVE_FULL_I
1187 [(match_operand 5)
1188 (<max_opp>:SVE_FULL_I
1189 (match_dup 2)
1190 (match_dup 3))]
1191 UNSPEC_PRED_X))
1192 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1193 "TARGET_SVE2"
1194 "@
1195 <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1196 movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1197 [(set_attr "movprfx" "*,yes")]
1198 )
1199
1200 ;; =========================================================================
1201 ;; == Extending arithmetic
1202 ;; =========================================================================
1203
1204 ;; -------------------------------------------------------------------------
1205 ;; ---- [INT] Wide binary arithmetic
1206 ;; -------------------------------------------------------------------------
1207 ;; Includes:
1208 ;; - SADDWB
1209 ;; - SADDWT
1210 ;; - SSUBWB
1211 ;; - SSUBWT
1212 ;; - UADDWB
1213 ;; - UADDWT
1214 ;; - USUBWB
1215 ;; - USUBWT
1216 ;; -------------------------------------------------------------------------
1217
1218 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1219 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1220 (unspec:SVE_FULL_HSDI
1221 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1222 (match_operand:<VNARROW> 2 "register_operand" "w")]
1223 SVE2_INT_BINARY_WIDE))]
1224 "TARGET_SVE2"
1225 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
1226 )
1227
1228 ;; -------------------------------------------------------------------------
1229 ;; ---- [INT] Long binary arithmetic
1230 ;; -------------------------------------------------------------------------
1231 ;; Includes:
1232 ;; - SABDLB
1233 ;; - SABDLT
1234 ;; - SADDLB
1235 ;; - SADDLBT
1236 ;; - SADDLT
1237 ;; - SMULLB
1238 ;; - SMULLT
1239 ;; - SQDMULLB
1240 ;; - SQDMULLT
1241 ;; - SSUBLB
1242 ;; - SSUBLBT
1243 ;; - SSUBLT
1244 ;; - SSUBLTB
1245 ;; - UABDLB
1246 ;; - UABDLT
1247 ;; - UADDLB
1248 ;; - UADDLT
1249 ;; - UMULLB
1250 ;; - UMULLT
1251 ;; - USUBLB
1252 ;; - USUBLT
1253 ;; -------------------------------------------------------------------------
1254
1255 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1256 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1257 (unspec:SVE_FULL_HSDI
1258 [(match_operand:<VNARROW> 1 "register_operand" "w")
1259 (match_operand:<VNARROW> 2 "register_operand" "w")]
1260 SVE2_INT_BINARY_LONG))]
1261 "TARGET_SVE2"
1262 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
1263 )
1264
1265 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1266 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
1267 (unspec:SVE_FULL_SDI
1268 [(match_operand:<VNARROW> 1 "register_operand" "w")
1269 (unspec:<VNARROW>
1270 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
1271 (match_operand:SI 3 "const_int_operand")]
1272 UNSPEC_SVE_LANE_SELECT)]
1273 SVE2_INT_BINARY_LONG_LANE))]
1274 "TARGET_SVE2"
1275 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
1276 )
1277
1278 ;; -------------------------------------------------------------------------
1279 ;; ---- [INT] Long left shifts
1280 ;; -------------------------------------------------------------------------
1281 ;; Includes:
1282 ;; - SSHLLB
1283 ;; - SSHLLT
1284 ;; - USHLLB
1285 ;; - USHLLT
1286 ;; -------------------------------------------------------------------------
1287
1288 ;; The immediate range is enforced before generating the instruction.
1289 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1290 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1291 (unspec:SVE_FULL_HSDI
1292 [(match_operand:<VNARROW> 1 "register_operand" "w")
1293 (match_operand:DI 2 "const_int_operand")]
1294 SVE2_INT_SHIFT_IMM_LONG))]
1295 "TARGET_SVE2"
1296 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
1297 )
1298
1299 ;; -------------------------------------------------------------------------
1300 ;; ---- [INT] Long binary arithmetic with accumulation
1301 ;; -------------------------------------------------------------------------
1302 ;; Includes:
1303 ;; - SABALB
1304 ;; - SABALT
1305 ;; - SMLALB
1306 ;; - SMLALT
1307 ;; - SMLSLB
1308 ;; - SMLSLT
1309 ;; - SQDMLALB
1310 ;; - SQDMLALBT
1311 ;; - SQDMLALT
1312 ;; - SQDMLSLB
1313 ;; - SQDMLSLBT
1314 ;; - SQDMLSLT
1315 ;; - UABALB
1316 ;; - UABALT
1317 ;; - UMLALB
1318 ;; - UMLALT
1319 ;; - UMLSLB
1320 ;; - UMLSLT
1321 ;; -------------------------------------------------------------------------
1322
1323 ;; Non-saturating MLA operations.
1324 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1325 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1326 (plus:SVE_FULL_HSDI
1327 (unspec:SVE_FULL_HSDI
1328 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1329 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1330 SVE2_INT_ADD_BINARY_LONG)
1331 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
1332 "TARGET_SVE2"
1333 "@
1334 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1335 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1336 [(set_attr "movprfx" "*,yes")]
1337 )
1338
1339 ;; Non-saturating MLA operations with lane select.
1340 (define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
1341 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1342 (plus:SVE_FULL_SDI
1343 (unspec:SVE_FULL_SDI
1344 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1345 (unspec:<VNARROW>
1346 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1347 (match_operand:SI 4 "const_int_operand")]
1348 UNSPEC_SVE_LANE_SELECT)]
1349 SVE2_INT_ADD_BINARY_LONG_LANE)
1350 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
1351 "TARGET_SVE2"
1352 "@
1353 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1354 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1355 [(set_attr "movprfx" "*,yes")]
1356 )
1357
1358 ;; Saturating MLA operations.
1359 (define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
1360 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1361 (ss_plus:SVE_FULL_HSDI
1362 (unspec:SVE_FULL_HSDI
1363 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1364 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1365 SVE2_INT_QADD_BINARY_LONG)
1366 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
1367 "TARGET_SVE2"
1368 "@
1369 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1370 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1371 [(set_attr "movprfx" "*,yes")]
1372 )
1373
1374 ;; Saturating MLA operations with lane select.
1375 (define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
1376 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1377 (ss_plus:SVE_FULL_SDI
1378 (unspec:SVE_FULL_SDI
1379 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1380 (unspec:<VNARROW>
1381 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1382 (match_operand:SI 4 "const_int_operand")]
1383 UNSPEC_SVE_LANE_SELECT)]
1384 SVE2_INT_QADD_BINARY_LONG_LANE)
1385 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
1386 "TARGET_SVE2"
1387 "@
1388 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1389 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1390 [(set_attr "movprfx" "*,yes")]
1391 )
1392
1393 ;; Non-saturating MLS operations.
1394 (define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
1395 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1396 (minus:SVE_FULL_HSDI
1397 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
1398 (unspec:SVE_FULL_HSDI
1399 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1400 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1401 SVE2_INT_SUB_BINARY_LONG)))]
1402 "TARGET_SVE2"
1403 "@
1404 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1405 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1406 [(set_attr "movprfx" "*,yes")]
1407 )
1408
1409 ;; Non-saturating MLS operations with lane select.
1410 (define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
1411 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1412 (minus:SVE_FULL_SDI
1413 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1414 (unspec:SVE_FULL_SDI
1415 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1416 (unspec:<VNARROW>
1417 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1418 (match_operand:SI 4 "const_int_operand")]
1419 UNSPEC_SVE_LANE_SELECT)]
1420 SVE2_INT_SUB_BINARY_LONG_LANE)))]
1421 "TARGET_SVE2"
1422 "@
1423 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1424 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1425 [(set_attr "movprfx" "*,yes")]
1426 )
1427
1428 ;; Saturating MLS operations.
1429 (define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
1430 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1431 (ss_minus:SVE_FULL_HSDI
1432 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
1433 (unspec:SVE_FULL_HSDI
1434 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1435 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1436 SVE2_INT_QSUB_BINARY_LONG)))]
1437 "TARGET_SVE2"
1438 "@
1439 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1440 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1441 [(set_attr "movprfx" "*,yes")]
1442 )
1443
1444 ;; Saturating MLS operations with lane select.
1445 (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
1446 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1447 (ss_minus:SVE_FULL_SDI
1448 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1449 (unspec:SVE_FULL_SDI
1450 [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1451 (unspec:<VNARROW>
1452 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1453 (match_operand:SI 4 "const_int_operand")]
1454 UNSPEC_SVE_LANE_SELECT)]
1455 SVE2_INT_QSUB_BINARY_LONG_LANE)))]
1456 "TARGET_SVE2"
1457 "@
1458 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1459 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1460 [(set_attr "movprfx" "*,yes")]
1461 )
1462 ;; -------------------------------------------------------------------------
1463 ;; ---- [FP] Long multiplication with accumulation
1464 ;; -------------------------------------------------------------------------
1465 ;; Includes:
1466 ;; - FMLALB
1467 ;; - FMLALT
1468 ;; - FMLSLB
1469 ;; - FMLSLT
1470 ;; -------------------------------------------------------------------------
1471
1472 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
1473 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
1474 (unspec:VNx4SF_ONLY
1475 [(match_operand:<VNARROW> 1 "register_operand" "w, w")
1476 (match_operand:<VNARROW> 2 "register_operand" "w, w")
1477 (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
1478 SVE2_FP_TERNARY_LONG))]
1479 "TARGET_SVE2"
1480 "@
1481 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
1482 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
1483 [(set_attr "movprfx" "*,yes")]
1484 )
1485
1486 (define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
1487 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
1488 (unspec:VNx4SF_ONLY
1489 [(match_operand:<VNARROW> 1 "register_operand" "w, w")
1490 (unspec:<VNARROW>
1491 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1492 (match_operand:SI 3 "const_int_operand")]
1493 UNSPEC_SVE_LANE_SELECT)
1494 (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
1495 SVE2_FP_TERNARY_LONG_LANE))]
1496 "TARGET_SVE2"
1497 "@
1498 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
1499 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
1500 [(set_attr "movprfx" "*,yes")]
1501 )
1502
1503 ;; =========================================================================
1504 ;; == Narrowing arithnetic
1505 ;; =========================================================================
1506
1507 ;; -------------------------------------------------------------------------
1508 ;; ---- [INT] Narrowing unary arithmetic
1509 ;; -------------------------------------------------------------------------
1510 ;; Includes:
1511 ;; - SQXTNB
1512 ;; - SQXTNT
1513 ;; - SQXTUNB
1514 ;; - SQXTUNT
1515 ;; - UQXTNB
1516 ;; - UQXTNT
1517 ;; -------------------------------------------------------------------------
1518
1519 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1520 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1521 (unspec:<VNARROW>
1522 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
1523 SVE2_INT_UNARY_NARROWB))]
1524 "TARGET_SVE2"
1525 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
1526 )
1527
1528 ;; These instructions do not take MOVPRFX.
1529 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1530 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1531 (unspec:<VNARROW>
1532 [(match_operand:<VNARROW> 1 "register_operand" "0")
1533 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
1534 SVE2_INT_UNARY_NARROWT))]
1535 "TARGET_SVE2"
1536 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
1537 )
1538
1539 ;; -------------------------------------------------------------------------
1540 ;; ---- [INT] Narrowing binary arithmetic
1541 ;; -------------------------------------------------------------------------
1542 ;; Includes:
1543 ;; - ADDHNB
1544 ;; - ADDHNT
1545 ;; - RADDHNB
1546 ;; - RADDHNT
1547 ;; - RSUBHNB
1548 ;; - RSUBHNT
1549 ;; - SUBHNB
1550 ;; - SUBHNT
1551 ;; -------------------------------------------------------------------------
1552
1553 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1554 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1555 (unspec:<VNARROW>
1556 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1557 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
1558 SVE2_INT_BINARY_NARROWB))]
1559 "TARGET_SVE2"
1560 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
1561 )
1562
1563 ;; These instructions do not take MOVPRFX.
1564 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1565 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1566 (unspec:<VNARROW>
1567 [(match_operand:<VNARROW> 1 "register_operand" "0")
1568 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
1569 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
1570 SVE2_INT_BINARY_NARROWT))]
1571 "TARGET_SVE2"
1572 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
1573 )
1574
1575 ;; -------------------------------------------------------------------------
1576 ;; ---- [INT] Narrowing right shifts
1577 ;; -------------------------------------------------------------------------
1578 ;; Includes:
1579 ;; - RSHRNB
1580 ;; - RSHRNT
1581 ;; - SHRNB
1582 ;; - SHRNT
1583 ;; - SQRSHRNB
1584 ;; - SQRSHRNT
1585 ;; - SQRSHRUNB
1586 ;; - SQRSHRUNT
1587 ;; - SQSHRNB
1588 ;; - SQSHRNT
1589 ;; - SQSHRUNB
1590 ;; - SQSHRUNT
1591 ;; - UQRSHRNB
1592 ;; - UQRSHRNT
1593 ;; - UQSHRNB
1594 ;; - UQSHRNT
1595 ;; -------------------------------------------------------------------------
1596
1597 ;; The immediate range is enforced before generating the instruction.
1598 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1599 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1600 (unspec:<VNARROW>
1601 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1602 (match_operand:DI 2 "const_int_operand")]
1603 SVE2_INT_SHIFT_IMM_NARROWB))]
1604 "TARGET_SVE2"
1605 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
1606 )
1607
1608 ;; The immediate range is enforced before generating the instruction.
1609 ;; These instructions do not take MOVPRFX.
1610 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1611 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1612 (unspec:<VNARROW>
1613 [(match_operand:<VNARROW> 1 "register_operand" "0")
1614 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
1615 (match_operand:DI 3 "const_int_operand")]
1616 SVE2_INT_SHIFT_IMM_NARROWT))]
1617 "TARGET_SVE2"
1618 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
1619 )
1620
1621 ;; =========================================================================
1622 ;; == Pairwise arithmetic
1623 ;; =========================================================================
1624
1625 ;; -------------------------------------------------------------------------
1626 ;; ---- [INT] Pairwise arithmetic
1627 ;; -------------------------------------------------------------------------
1628 ;; Includes:
1629 ;; - ADDP
1630 ;; - SMAXP
1631 ;; - SMINP
1632 ;; - UMAXP
1633 ;; - UMINP
1634 ;; -------------------------------------------------------------------------
1635
1636 (define_insn "@aarch64_pred_<sve_int_op><mode>"
1637 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1638 (unspec:SVE_FULL_I
1639 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1640 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
1641 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
1642 SVE2_INT_BINARY_PAIR))]
1643 "TARGET_SVE2"
1644 "@
1645 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1646 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1647 [(set_attr "movprfx" "*,yes")]
1648 )
1649
1650 ;; -------------------------------------------------------------------------
1651 ;; ---- [FP] Pairwise arithmetic
1652 ;; -------------------------------------------------------------------------
1653 ;; Includes:
1654 ;; - FADDP
1655 ;; - FMAXP
1656 ;; - FMAXNMP
1657 ;; - FMINP
1658 ;; - FMINNMP
1659 ;; -------------------------------------------------------------------------
1660
1661 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
1662 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
1663 (unspec:SVE_FULL_F
1664 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1665 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
1666 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
1667 SVE2_FP_BINARY_PAIR))]
1668 "TARGET_SVE2"
1669 "@
1670 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1671 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1672 [(set_attr "movprfx" "*,yes")]
1673 )
1674
1675 ;; -------------------------------------------------------------------------
1676 ;; ---- [INT] Pairwise arithmetic with accumulation
1677 ;; -------------------------------------------------------------------------
1678 ;; Includes:
1679 ;; - SADALP
1680 ;; - UADALP
1681 ;; -------------------------------------------------------------------------
1682
1683 ;; Predicated pairwise absolute difference and accumulate with merging.
1684 (define_expand "@cond_<sve_int_op><mode>"
1685 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1686 (unspec:SVE_FULL_HSDI
1687 [(match_operand:<VPRED> 1 "register_operand")
1688 (unspec:SVE_FULL_HSDI
1689 [(match_dup 1)
1690 (match_operand:SVE_FULL_HSDI 2 "register_operand")
1691 (match_operand:<VNARROW> 3 "register_operand")]
1692 SVE2_INT_BINARY_PAIR_LONG)
1693 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
1694 UNSPEC_SEL))]
1695 "TARGET_SVE2"
1696 {
1697 /* Only target code is aware of these operations, so we don't need
1698 to handle the fully-general case. */
1699 gcc_assert (rtx_equal_p (operands[2], operands[4])
1700 || CONSTANT_P (operands[4]));
1701 })
1702
1703 ;; Predicated pairwise absolute difference and accumulate, merging with
1704 ;; the first input.
1705 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1706 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1707 (unspec:SVE_FULL_HSDI
1708 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1709 (unspec:SVE_FULL_HSDI
1710 [(match_operand 4)
1711 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
1712 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1713 SVE2_INT_BINARY_PAIR_LONG)
1714 (match_dup 2)]
1715 UNSPEC_SEL))]
1716 "TARGET_SVE2"
1717 "@
1718 <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
1719 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
1720 "&& !CONSTANT_P (operands[4])"
1721 {
1722 operands[4] = CONSTM1_RTX (<VPRED>mode);
1723 }
1724 [(set_attr "movprfx" "*,yes")]
1725 )
1726
1727 ;; Predicated pairwise absolute difference and accumulate, merging with zero.
1728 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
1729 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
1730 (unspec:SVE_FULL_HSDI
1731 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1732 (unspec:SVE_FULL_HSDI
1733 [(match_operand 5)
1734 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
1735 (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1736 SVE2_INT_BINARY_PAIR_LONG)
1737 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
1738 UNSPEC_SEL))]
1739 "TARGET_SVE2"
1740 "@
1741 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
1742 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
1743 "&& !CONSTANT_P (operands[5])"
1744 {
1745 operands[5] = CONSTM1_RTX (<VPRED>mode);
1746 }
1747 [(set_attr "movprfx" "yes")]
1748 )
1749
1750 ;; =========================================================================
1751 ;; == Complex arithmetic
1752 ;; =========================================================================
1753
1754 ;; -------------------------------------------------------------------------
1755 ;; ---- [INT] Complex binary operations
1756 ;; -------------------------------------------------------------------------
1757 ;; Includes:
1758 ;; - CADD
1759 ;; - SQCADD
1760 ;; -------------------------------------------------------------------------
1761
1762 (define_insn "@aarch64_sve_<optab><mode>"
1763 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1764 (unspec:SVE_FULL_I
1765 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
1766 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
1767 SVE2_INT_CADD))]
1768 "TARGET_SVE2"
1769 "@
1770 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
1771 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
1772 [(set_attr "movprfx" "*,yes")]
1773 )
1774
1775 ;; -------------------------------------------------------------------------
1776 ;; ---- [INT] Complex ternary operations
1777 ;; -------------------------------------------------------------------------
1778 ;; Includes:
1779 ;; - CMLA
1780 ;; - SQRDCMLA
1781 ;; -------------------------------------------------------------------------
1782
1783 (define_insn "@aarch64_sve_<optab><mode>"
1784 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1785 (unspec:SVE_FULL_I
1786 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
1787 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1788 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
1789 SVE2_INT_CMLA))]
1790 "TARGET_SVE2"
1791 "@
1792 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
1793 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
1794 [(set_attr "movprfx" "*,yes")]
1795 )
1796
1797 (define_insn "@aarch64_<optab>_lane_<mode>"
1798 [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
1799 (unspec:SVE_FULL_HSI
1800 [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
1801 (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
1802 (unspec:SVE_FULL_HSI
1803 [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1804 (match_operand:SI 4 "const_int_operand")]
1805 UNSPEC_SVE_LANE_SELECT)]
1806 SVE2_INT_CMLA))]
1807 "TARGET_SVE2"
1808 "@
1809 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
1810 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
1811 [(set_attr "movprfx" "*,yes")]
1812 )
1813
1814 ;; -------------------------------------------------------------------------
1815 ;; ---- [INT] Complex dot product
1816 ;; -------------------------------------------------------------------------
1817 ;; Includes:
1818 ;; - CDOT
1819 ;; -------------------------------------------------------------------------
1820
1821 (define_insn "@aarch64_sve_<optab><mode>"
1822 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1823 (unspec:SVE_FULL_SDI
1824 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1825 (match_operand:<VSI2QI> 2 "register_operand" "w, w")
1826 (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
1827 SVE2_INT_CDOT))]
1828 "TARGET_SVE2"
1829 "@
1830 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
1831 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
1832 [(set_attr "movprfx" "*,yes")]
1833 )
1834
1835 (define_insn "@aarch64_<optab>_lane_<mode>"
1836 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1837 (unspec:SVE_FULL_SDI
1838 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1839 (match_operand:<VSI2QI> 2 "register_operand" "w, w")
1840 (unspec:<VSI2QI>
1841 [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1842 (match_operand:SI 4 "const_int_operand")]
1843 UNSPEC_SVE_LANE_SELECT)]
1844 SVE2_INT_CDOT))]
1845 "TARGET_SVE2"
1846 "@
1847 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
1848 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
1849 [(set_attr "movprfx" "*,yes")]
1850 )
1851
1852 ;; =========================================================================
1853 ;; == Conversions
1854 ;; =========================================================================
1855
1856 ;; -------------------------------------------------------------------------
1857 ;; ---- [FP<-FP] Widening conversions
1858 ;; -------------------------------------------------------------------------
1859 ;; Includes:
1860 ;; - FCVTLT
1861 ;; -------------------------------------------------------------------------
1862
1863 ;; Predicated convert long top.
1864 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
1865 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
1866 (unspec:SVE_FULL_SDF
1867 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1868 (match_operand:SI 3 "aarch64_sve_gp_strictness")
1869 (match_operand:<VNARROW> 2 "register_operand" "w")]
1870 SVE2_COND_FP_UNARY_LONG))]
1871 "TARGET_SVE2"
1872 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
1873 )
1874
1875 ;; Predicated convert long top with merging.
1876 (define_expand "@cond_<sve_fp_op><mode>"
1877 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
1878 (unspec:SVE_FULL_SDF
1879 [(match_operand:<VPRED> 1 "register_operand")
1880 (unspec:SVE_FULL_SDF
1881 [(match_dup 1)
1882 (const_int SVE_STRICT_GP)
1883 (match_operand:<VNARROW> 2 "register_operand")]
1884 SVE2_COND_FP_UNARY_LONG)
1885 (match_operand:SVE_FULL_SDF 3 "register_operand")]
1886 UNSPEC_SEL))]
1887 "TARGET_SVE2"
1888 )
1889
1890 ;; These instructions do not take MOVPRFX.
1891 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
1892 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
1893 (unspec:SVE_FULL_SDF
1894 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1895 (unspec:SVE_FULL_SDF
1896 [(match_operand 4)
1897 (match_operand:SI 5 "aarch64_sve_gp_strictness")
1898 (match_operand:<VNARROW> 2 "register_operand" "w")]
1899 SVE2_COND_FP_UNARY_LONG)
1900 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
1901 UNSPEC_SEL))]
1902 "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
1903 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
1904 "&& !rtx_equal_p (operands[1], operands[4])"
1905 {
1906 operands[4] = copy_rtx (operands[1]);
1907 }
1908 )
1909
1910 ;; -------------------------------------------------------------------------
1911 ;; ---- [FP<-FP] Narrowing conversions
1912 ;; -------------------------------------------------------------------------
1913 ;; Includes:
1914 ;; - FCVTNT
1915 ;; - FCVTX
1916 ;; - FCVTXNT
1917 ;; -------------------------------------------------------------------------
1918
1919 ;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
1920 ;; pair because the even elements always have to be supplied for active
1921 ;; elements, even if the inactive elements don't matter.
1922 ;;
1923 ;; These instructions do not take MOVPRFX.
1924 (define_insn "@aarch64_sve_cvtnt<mode>"
1925 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
1926 (unspec:SVE_FULL_HSF
1927 [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
1928 (const_int SVE_STRICT_GP)
1929 (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
1930 (match_operand:<VWIDE> 3 "register_operand" "w")]
1931 UNSPEC_COND_FCVTNT))]
1932 "TARGET_SVE2"
1933 "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
1934 )
1935
1936 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
1937 ;; it supports MOVPRFX).
1938 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
1939 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w")
1940 (unspec:VNx4SF_ONLY
1941 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
1942 (match_operand:SI 3 "aarch64_sve_gp_strictness")
1943 (match_operand:<VWIDE> 2 "register_operand" "w")]
1944 SVE2_COND_FP_UNARY_NARROWB))]
1945 "TARGET_SVE2"
1946 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
1947 )
1948
1949 ;; Predicated FCVTX with merging.
1950 (define_expand "@cond_<sve_fp_op><mode>"
1951 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
1952 (unspec:VNx4SF_ONLY
1953 [(match_operand:<VWIDE_PRED> 1 "register_operand")
1954 (unspec:VNx4SF_ONLY
1955 [(match_dup 1)
1956 (const_int SVE_STRICT_GP)
1957 (match_operand:<VWIDE> 2 "register_operand")]
1958 SVE2_COND_FP_UNARY_NARROWB)
1959 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
1960 UNSPEC_SEL))]
1961 "TARGET_SVE2"
1962 )
1963
1964 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any"
1965 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
1966 (unspec:VNx4SF_ONLY
1967 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
1968 (unspec:VNx4SF_ONLY
1969 [(match_operand 4)
1970 (match_operand:SI 5 "aarch64_sve_gp_strictness")
1971 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
1972 SVE2_COND_FP_UNARY_NARROWB)
1973 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
1974 UNSPEC_SEL))]
1975 "TARGET_SVE2
1976 && !rtx_equal_p (operands[2], operands[3])
1977 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
1978 "@
1979 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
1980 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
1981 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
1982 "&& !rtx_equal_p (operands[1], operands[4])"
1983 {
1984 operands[4] = copy_rtx (operands[1]);
1985 }
1986 [(set_attr "movprfx" "*,yes,yes")]
1987 )
1988
1989 ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
1990 ;; pair because the even elements always have to be supplied for active
1991 ;; elements, even if the inactive elements don't matter.
1992 ;;
1993 ;; These instructions do not take MOVPRFX.
1994 (define_insn "@aarch64_sve2_cvtxnt<mode>"
1995 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1996 (unspec:<VNARROW>
1997 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1998 (const_int SVE_STRICT_GP)
1999 (match_operand:<VNARROW> 1 "register_operand" "0")
2000 (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
2001 UNSPEC_COND_FCVTXNT))]
2002 "TARGET_SVE2"
2003 "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
2004 )
2005
2006 ;; =========================================================================
2007 ;; == Other arithmetic
2008 ;; =========================================================================
2009
2010 ;; -------------------------------------------------------------------------
2011 ;; ---- [INT] Reciprocal approximation
2012 ;; -------------------------------------------------------------------------
2013 ;; Includes:
2014 ;; - URECPE
2015 ;; - URSQRTE
2016 ;; -------------------------------------------------------------------------
2017
2018 ;; Predicated integer unary operations.
2019 (define_insn "@aarch64_pred_<sve_int_op><mode>"
2020 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
2021 (unspec:VNx4SI_ONLY
2022 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2023 (unspec:VNx4SI_ONLY
2024 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
2025 SVE2_U32_UNARY)]
2026 UNSPEC_PRED_X))]
2027 "TARGET_SVE2"
2028 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2029 )
2030
2031 ;; Predicated integer unary operations with merging.
2032 (define_expand "@cond_<sve_int_op><mode>"
2033 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
2034 (unspec:VNx4SI_ONLY
2035 [(match_operand:<VPRED> 1 "register_operand")
2036 (unspec:VNx4SI_ONLY
2037 [(match_dup 4)
2038 (unspec:VNx4SI_ONLY
2039 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
2040 SVE2_U32_UNARY)]
2041 UNSPEC_PRED_X)
2042 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
2043 UNSPEC_SEL))]
2044 "TARGET_SVE2"
2045 {
2046 operands[4] = CONSTM1_RTX (<MODE>mode);
2047 }
2048 )
2049
2050 (define_insn_and_rewrite "*cond_<sve_int_op><mode>"
2051 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
2052 (unspec:VNx4SI_ONLY
2053 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2054 (unspec:VNx4SI_ONLY
2055 [(match_operand 4)
2056 (unspec:VNx4SI_ONLY
2057 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
2058 SVE2_U32_UNARY)]
2059 UNSPEC_PRED_X)
2060 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2061 UNSPEC_SEL))]
2062 "TARGET_SVE2"
2063 "@
2064 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2065 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2066 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2067 "&& !CONSTANT_P (operands[4])"
2068 {
2069 operands[4] = CONSTM1_RTX (<VPRED>mode);
2070 }
2071 [(set_attr "movprfx" "*,yes,yes")]
2072 )
2073
2074 ;; -------------------------------------------------------------------------
2075 ;; ---- [INT<-FP] Base-2 logarithm
2076 ;; -------------------------------------------------------------------------
2077 ;; Includes:
2078 ;; - FLOGB
2079 ;; -------------------------------------------------------------------------
2080
2081 ;; Predicated FLOGB.
2082 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
2083 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
2084 (unspec:<V_INT_EQUIV>
2085 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2086 (match_operand:SI 3 "aarch64_sve_gp_strictness")
2087 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
2088 SVE2_COND_INT_UNARY_FP))]
2089 "TARGET_SVE2"
2090 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2091 )
2092
2093 ;; Predicated FLOGB with merging.
2094 (define_expand "@cond_<sve_fp_op><mode>"
2095 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2096 (unspec:<V_INT_EQUIV>
2097 [(match_operand:<VPRED> 1 "register_operand")
2098 (unspec:<V_INT_EQUIV>
2099 [(match_dup 1)
2100 (const_int SVE_STRICT_GP)
2101 (match_operand:SVE_FULL_F 2 "register_operand")]
2102 SVE2_COND_INT_UNARY_FP)
2103 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
2104 UNSPEC_SEL))]
2105 "TARGET_SVE2"
2106 )
2107
2108 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
2109 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
2110 (unspec:<V_INT_EQUIV>
2111 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2112 (unspec:<V_INT_EQUIV>
2113 [(match_operand 4)
2114 (match_operand:SI 5 "aarch64_sve_gp_strictness")
2115 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
2116 SVE2_COND_INT_UNARY_FP)
2117 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2118 UNSPEC_SEL))]
2119 "TARGET_SVE2
2120 && !rtx_equal_p (operands[2], operands[3])
2121 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
2122 "@
2123 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2124 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2125 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2126 "&& !rtx_equal_p (operands[1], operands[4])"
2127 {
2128 operands[4] = copy_rtx (operands[1]);
2129 }
2130 [(set_attr "movprfx" "*,yes,yes")]
2131 )
2132
2133 ;; -------------------------------------------------------------------------
2134 ;; ---- [INT] Polynomial multiplication
2135 ;; -------------------------------------------------------------------------
2136 ;; Includes:
2137 ;; - PMUL
2138 ;; - PMULLB
2139 ;; - PMULLT
2140 ;; -------------------------------------------------------------------------
2141
2142 ;; Uniform PMUL.
2143 (define_insn "@aarch64_sve2_pmul<mode>"
2144 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2145 (unspec:VNx16QI_ONLY
2146 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
2147 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
2148 UNSPEC_PMUL))]
2149 "TARGET_SVE2"
2150 "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2151 )
2152
2153 ;; Extending PMUL, with the results modeled as wider vectors.
2154 ;; This representation is only possible for .H and .D, not .Q.
2155 (define_insn "@aarch64_sve_<optab><mode>"
2156 [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
2157 (unspec:SVE_FULL_HDI
2158 [(match_operand:<VNARROW> 1 "register_operand" "w")
2159 (match_operand:<VNARROW> 2 "register_operand" "w")]
2160 SVE2_PMULL))]
2161 "TARGET_SVE2"
2162 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
2163 )
2164
2165 ;; Extending PMUL, with the results modeled as pairs of values.
2166 ;; This representation works for .H, .D and .Q, with .Q requiring
2167 ;; the AES extension. (This is enforced by the mode iterator.)
2168 (define_insn "@aarch64_sve_<optab><mode>"
2169 [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
2170 (unspec:SVE2_PMULL_PAIR_I
2171 [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
2172 (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
2173 SVE2_PMULL_PAIR))]
2174 "TARGET_SVE2"
2175 "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
2176 )
2177
2178 ;; =========================================================================
2179 ;; == Permutation
2180 ;; =========================================================================
2181
2182 ;; -------------------------------------------------------------------------
2183 ;; ---- [INT,FP] General permutes
2184 ;; -------------------------------------------------------------------------
2185 ;; Includes:
2186 ;; - TBL (vector pair form)
2187 ;; - TBX
2188 ;; -------------------------------------------------------------------------
2189
2190 ;; TBL on a pair of data vectors.
2191 (define_insn "@aarch64_sve2_tbl2<mode>"
2192 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2193 (unspec:SVE_FULL
2194 [(match_operand:<VDOUBLE> 1 "register_operand" "w")
2195 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
2196 UNSPEC_TBL2))]
2197 "TARGET_SVE2"
2198 "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
2199 )
2200
2201 ;; TBX. These instructions do not take MOVPRFX.
2202 (define_insn "@aarch64_sve2_tbx<mode>"
2203 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2204 (unspec:SVE_FULL
2205 [(match_operand:SVE_FULL 1 "register_operand" "0")
2206 (match_operand:SVE_FULL 2 "register_operand" "w")
2207 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
2208 UNSPEC_TBX))]
2209 "TARGET_SVE2"
2210 "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
2211 )
2212
2213 ;; -------------------------------------------------------------------------
2214 ;; ---- [INT] Optional bit-permute extensions
2215 ;; -------------------------------------------------------------------------
2216 ;; Includes:
2217 ;; - BDEP
2218 ;; - BEXT
2219 ;; - BGRP
2220 ;; -------------------------------------------------------------------------
2221
2222 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2223 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2224 (unspec:SVE_FULL_I
2225 [(match_operand:SVE_FULL_I 1 "register_operand" "w")
2226 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
2227 SVE2_INT_BITPERM))]
2228 "TARGET_SVE2_BITPERM"
2229 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2230 )
2231
2232 ;; =========================================================================
2233 ;; == General
2234 ;; =========================================================================
2235
2236 ;; -------------------------------------------------------------------------
2237 ;; ---- Check for aliases between pointers
2238 ;; -------------------------------------------------------------------------
2239 ;; The patterns in this section are synthetic: WHILERW and WHILEWR are
2240 ;; defined in aarch64-sve.md instead.
2241 ;; -------------------------------------------------------------------------
2242
2243 ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only
2244 ;; possible if the accesses we're checking are exactly the same size
2245 ;; as an SVE vector.
2246 (define_expand "check_<raw_war>_ptrs<mode>"
2247 [(match_operand:GPI 0 "register_operand")
2248 (unspec:VNx16BI
2249 [(match_operand:GPI 1 "register_operand")
2250 (match_operand:GPI 2 "register_operand")
2251 (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
2252 (match_operand:GPI 4 "const_int_operand")]
2253 SVE2_WHILE_PTR)]
2254 "TARGET_SVE2"
2255 {
2256 /* Use the widest predicate mode we can. */
2257 unsigned int align = INTVAL (operands[4]);
2258 if (align > 8)
2259 align = 8;
2260 machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
2261
2262 /* Emit a WHILERW or WHILEWR, setting the condition codes based on
2263 the result. */
2264 emit_insn (gen_while_ptest
2265 (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
2266 gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
2267 CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
2268
2269 /* Set operand 0 to true if the last bit of the predicate result is set,
2270 i.e. if all elements are free of dependencies. */
2271 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
2272 rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
2273 emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
2274 DONE;
2275 })
2276
2277 ;; -------------------------------------------------------------------------
2278 ;; ---- Histogram processing
2279 ;; -------------------------------------------------------------------------
2280 ;; Includes:
2281 ;; - HISTCNT
2282 ;; - HISTSEG
2283 ;; -------------------------------------------------------------------------
2284
2285 (define_insn "@aarch64_sve2_histcnt<mode>"
2286 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
2287 (unspec:SVE_FULL_SDI
2288 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2289 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
2290 (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
2291 UNSPEC_HISTCNT))]
2292 "TARGET_SVE2"
2293 "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2294 )
2295
2296 (define_insn "@aarch64_sve2_histseg<mode>"
2297 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2298 (unspec:VNx16QI_ONLY
2299 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
2300 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
2301 UNSPEC_HISTSEG))]
2302 "TARGET_SVE2"
2303 "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2304 )
2305
2306 ;; -------------------------------------------------------------------------
2307 ;; ---- String matching
2308 ;; -------------------------------------------------------------------------
2309 ;; Includes:
2310 ;; - MATCH
2311 ;; - NMATCH
2312 ;; -------------------------------------------------------------------------
2313
2314 ;; Predicated string matching.
2315 (define_insn "@aarch64_pred_<sve_int_op><mode>"
2316 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
2317 (unspec:<VPRED>
2318 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2319 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
2320 (unspec:<VPRED>
2321 [(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
2322 (match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
2323 SVE2_MATCH)]
2324 UNSPEC_PRED_Z))
2325 (clobber (reg:CC_NZC CC_REGNUM))]
2326 "TARGET_SVE2"
2327 "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
2328 )
2329
2330 ;; Predicated string matching in which both the flag and predicate results
2331 ;; are interesting.
2332 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
2333 [(set (reg:CC_NZC CC_REGNUM)
2334 (unspec:CC_NZC
2335 [(match_operand:VNx16BI 1 "register_operand" "Upl")
2336 (match_operand 4)
2337 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
2338 (unspec:<VPRED>
2339 [(match_operand 6)
2340 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
2341 (unspec:<VPRED>
2342 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
2343 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
2344 SVE2_MATCH)]
2345 UNSPEC_PRED_Z)]
2346 UNSPEC_PTEST))
2347 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
2348 (unspec:<VPRED>
2349 [(match_dup 6)
2350 (match_dup 7)
2351 (unspec:<VPRED>
2352 [(match_dup 2)
2353 (match_dup 3)]
2354 SVE2_MATCH)]
2355 UNSPEC_PRED_Z))]
2356 "TARGET_SVE2
2357 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
2358 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2359 "&& !rtx_equal_p (operands[4], operands[6])"
2360 {
2361 operands[6] = copy_rtx (operands[4]);
2362 operands[7] = operands[5];
2363 }
2364 )
2365
2366 ;; Predicated string matching in which only the flags result is interesting.
2367 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
2368 [(set (reg:CC_NZC CC_REGNUM)
2369 (unspec:CC_NZC
2370 [(match_operand:VNx16BI 1 "register_operand" "Upl")
2371 (match_operand 4)
2372 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
2373 (unspec:<VPRED>
2374 [(match_operand 6)
2375 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
2376 (unspec:<VPRED>
2377 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
2378 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
2379 SVE2_MATCH)]
2380 UNSPEC_PRED_Z)]
2381 UNSPEC_PTEST))
2382 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
2383 "TARGET_SVE2
2384 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
2385 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2386 "&& !rtx_equal_p (operands[4], operands[6])"
2387 {
2388 operands[6] = copy_rtx (operands[4]);
2389 operands[7] = operands[5];
2390 }
2391 )
2392
2393 ;; =========================================================================
2394 ;; == Crypotographic extensions
2395 ;; =========================================================================
2396
2397 ;; -------------------------------------------------------------------------
2398 ;; ---- Optional AES extensions
2399 ;; -------------------------------------------------------------------------
2400 ;; Includes:
2401 ;; - AESD
2402 ;; - AESE
2403 ;; - AESIMC
2404 ;; - AESMC
2405 ;; -------------------------------------------------------------------------
2406
2407 ;; AESD and AESE.
2408 (define_insn "aarch64_sve2_aes<aes_op>"
2409 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2410 (unspec:VNx16QI
2411 [(xor:VNx16QI
2412 (match_operand:VNx16QI 1 "register_operand" "%0")
2413 (match_operand:VNx16QI 2 "register_operand" "w"))]
2414 CRYPTO_AES))]
2415 "TARGET_SVE2_AES"
2416 "aes<aes_op>\t%0.b, %0.b, %2.b"
2417 [(set_attr "type" "crypto_aese")]
2418 )
2419
2420 ;; AESMC and AESIMC. These instructions do not take MOVPRFX.
2421 (define_insn "aarch64_sve2_aes<aesmc_op>"
2422 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2423 (unspec:VNx16QI
2424 [(match_operand:VNx16QI 1 "register_operand" "0")]
2425 CRYPTO_AESMC))]
2426 "TARGET_SVE2_AES"
2427 "aes<aesmc_op>\t%0.b, %0.b"
2428 [(set_attr "type" "crypto_aesmc")]
2429 )
2430
2431 ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
2432 ;; to keep the two together and enforce the register dependency without
2433 ;; scheduling or register allocation messing up the order or introducing
2434 ;; moves inbetween. Mash the two together during combine.
2435
2436 (define_insn "*aarch64_sve2_aese_fused"
2437 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2438 (unspec:VNx16QI
2439 [(unspec:VNx16QI
2440 [(xor:VNx16QI
2441 (match_operand:VNx16QI 1 "register_operand" "%0")
2442 (match_operand:VNx16QI 2 "register_operand" "w"))]
2443 UNSPEC_AESE)]
2444 UNSPEC_AESMC))]
2445 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
2446 "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
2447 [(set_attr "type" "crypto_aese")
2448 (set_attr "length" "8")]
2449 )
2450
2451 (define_insn "*aarch64_sve2_aesd_fused"
2452 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2453 (unspec:VNx16QI
2454 [(unspec:VNx16QI
2455 [(xor:VNx16QI
2456 (match_operand:VNx16QI 1 "register_operand" "%0")
2457 (match_operand:VNx16QI 2 "register_operand" "w"))]
2458 UNSPEC_AESD)]
2459 UNSPEC_AESIMC))]
2460 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
2461 "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
2462 [(set_attr "type" "crypto_aese")
2463 (set_attr "length" "8")]
2464 )
2465
2466 ;; -------------------------------------------------------------------------
2467 ;; ---- Optional SHA-3 extensions
2468 ;; -------------------------------------------------------------------------
2469 ;; Includes:
2470 ;; - RAX1
2471 ;; -------------------------------------------------------------------------
2472
2473 (define_insn "aarch64_sve2_rax1"
2474 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
2475 (xor:VNx2DI
2476 (rotate:VNx2DI
2477 (match_operand:VNx2DI 2 "register_operand" "w")
2478 (const_int 1))
2479 (match_operand:VNx2DI 1 "register_operand" "w")))]
2480 "TARGET_SVE2_SHA3"
2481 "rax1\t%0.d, %1.d, %2.d"
2482 [(set_attr "type" "crypto_sha3")]
2483 )
2484
2485 ;; -------------------------------------------------------------------------
2486 ;; ---- Optional SM4 extensions
2487 ;; -------------------------------------------------------------------------
2488 ;; Includes:
2489 ;; - SM4E
2490 ;; - SM4EKEY
2491 ;; -------------------------------------------------------------------------
2492
2493 ;; These instructions do not take MOVPRFX.
2494 (define_insn "aarch64_sve2_sm4e"
2495 [(set (match_operand:VNx4SI 0 "register_operand" "=w")
2496 (unspec:VNx4SI
2497 [(match_operand:VNx4SI 1 "register_operand" "0")
2498 (match_operand:VNx4SI 2 "register_operand" "w")]
2499 UNSPEC_SM4E))]
2500 "TARGET_SVE2_SM4"
2501 "sm4e\t%0.s, %0.s, %2.s"
2502 [(set_attr "type" "crypto_sm4")]
2503 )
2504
2505 (define_insn "aarch64_sve2_sm4ekey"
2506 [(set (match_operand:VNx4SI 0 "register_operand" "=w")
2507 (unspec:VNx4SI
2508 [(match_operand:VNx4SI 1 "register_operand" "w")
2509 (match_operand:VNx4SI 2 "register_operand" "w")]
2510 UNSPEC_SM4EKEY))]
2511 "TARGET_SVE2_SM4"
2512 "sm4ekey\t%0.s, %1.s, %2.s"
2513 [(set_attr "type" "crypto_sm4")]
2514 )