Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/aarch64-sve2.md @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 ;; Machine description for AArch64 SVE2. | |
2 ;; Copyright (C) 2019-2020 Free Software Foundation, Inc. | |
3 ;; Contributed by ARM Ltd. | |
4 ;; | |
5 ;; This file is part of GCC. | |
6 ;; | |
7 ;; GCC is free software; you can redistribute it and/or modify it | |
8 ;; under the terms of the GNU General Public License as published by | |
9 ;; the Free Software Foundation; either version 3, or (at your option) | |
10 ;; any later version. | |
11 ;; | |
12 ;; GCC is distributed in the hope that it will be useful, but | |
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 ;; General Public License for more details. | |
16 ;; | |
17 ;; You should have received a copy of the GNU General Public License | |
18 ;; along with GCC; see the file COPYING3. If not see | |
19 ;; <http://www.gnu.org/licenses/>. | |
20 | |
21 ;; The file is organised into the following sections (search for the full | |
22 ;; line): | |
23 ;; | |
24 ;; == Moves | |
25 ;; ---- Non-temporal gather loads | |
26 ;; ---- Non-temporal scatter stores | |
27 ;; | |
28 ;; == Uniform binary arithmnetic | |
29 ;; ---- [INT] Multiplication | |
30 ;; ---- [INT] Scaled high-part multiplication | |
31 ;; ---- [INT] General binary arithmetic that maps to unspecs | |
32 ;; ---- [INT] Saturating binary arithmetic | |
33 ;; ---- [INT] Saturating left shifts | |
34 ;; | |
35 ;; == Uniform ternary arithmnetic | |
36 ;; ---- [INT] General ternary arithmetic that maps to unspecs | |
37 ;; ---- [INT] Multiply-and-accumulate operations | |
38 ;; ---- [INT] Binary logic operations with rotation | |
39 ;; ---- [INT] Ternary logic operations | |
40 ;; ---- [INT] Shift-and-accumulate operations | |
41 ;; ---- [INT] Shift-and-insert operations | |
42 ;; ---- [INT] Sum of absolute differences | |
43 ;; | |
44 ;; == Extending arithmetic | |
45 ;; ---- [INT] Wide binary arithmetic | |
46 ;; ---- [INT] Long binary arithmetic | |
47 ;; ---- [INT] Long left shifts | |
48 ;; ---- [INT] Long binary arithmetic with accumulation | |
49 ;; ---- [FP] Long multiplication with accumulation | |
50 ;; | |
51 ;; == Narrowing arithnetic | |
52 ;; ---- [INT] Narrowing unary arithmetic | |
53 ;; ---- [INT] Narrowing binary arithmetic | |
54 ;; ---- [INT] Narrowing right shifts | |
55 ;; | |
56 ;; == Pairwise arithmetic | |
57 ;; ---- [INT] Pairwise arithmetic | |
58 ;; ---- [FP] Pairwise arithmetic | |
59 ;; ---- [INT] Pairwise arithmetic with accumulation | |
60 ;; | |
61 ;; == Complex arithmetic | |
62 ;; ---- [INT] Complex binary operations | |
63 ;; ---- [INT] Complex ternary operations | |
64 ;; ---- [INT] Complex dot product | |
65 ;; | |
66 ;; == Conversions | |
67 ;; ---- [FP<-FP] Widening conversions | |
68 ;; ---- [FP<-FP] Narrowing conversions | |
69 ;; | |
70 ;; == Other arithmetic | |
71 ;; ---- [INT] Reciprocal approximation | |
72 ;; ---- [INT<-FP] Base-2 logarithm | |
73 ;; ---- [INT] Polynomial multiplication | |
74 ;; | |
75 ;; == Permutation | |
76 ;; ---- [INT,FP] General permutes | |
77 ;; ---- [INT] Optional bit-permute extensions | |
78 ;; | |
79 ;; == General | |
80 ;; ---- Check for aliases between pointers | |
81 ;; ---- Histogram processing | |
82 ;; ---- String matching | |
83 ;; | |
84 ;; == Crypotographic extensions | |
85 ;; ---- Optional AES extensions | |
86 ;; ---- Optional SHA-3 extensions | |
87 ;; ---- Optional SM4 extensions | |
88 | |
89 ;; ========================================================================= | |
90 ;; == Moves | |
91 ;; ========================================================================= | |
92 | |
93 ;; ------------------------------------------------------------------------- | |
94 ;; ---- Non-temporal gather loads | |
95 ;; ------------------------------------------------------------------------- | |
96 ;; Includes gather forms of: | |
97 ;; - LDNT1B | |
98 ;; - LDNT1D | |
99 ;; - LDNT1H | |
100 ;; - LDNT1W | |
101 ;; ------------------------------------------------------------------------- | |
102 | |
103 ;; Non-extending loads. | |
104 (define_insn "@aarch64_gather_ldnt<mode>" | |
105 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w") | |
106 (unspec:SVE_FULL_SD | |
107 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
108 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") | |
109 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w") | |
110 (mem:BLK (scratch))] | |
111 UNSPEC_LDNT1_GATHER))] | |
112 "TARGET_SVE2" | |
113 "@ | |
114 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>] | |
115 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]" | |
116 ) | |
117 | |
118 ;; Extending loads. | |
119 (define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" | |
120 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w") | |
121 (unspec:SVE_FULL_SDI | |
122 [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm") | |
123 (ANY_EXTEND:SVE_FULL_SDI | |
124 (unspec:SVE_PARTIAL_I | |
125 [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl") | |
126 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") | |
127 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w") | |
128 (mem:BLK (scratch))] | |
129 UNSPEC_LDNT1_GATHER))] | |
130 UNSPEC_PRED_X))] | |
131 "TARGET_SVE2 | |
132 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
133 "@ | |
134 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] | |
135 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]" | |
136 "&& !CONSTANT_P (operands[4])" | |
137 { | |
138 operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode); | |
139 } | |
140 ) | |
141 | |
142 ;; ------------------------------------------------------------------------- | |
143 ;; ---- Non-temporal scatter stores | |
144 ;; ------------------------------------------------------------------------- | |
145 ;; Includes scatter forms of: | |
146 ;; - STNT1B | |
147 ;; - STNT1D | |
148 ;; - STNT1H | |
149 ;; - STNT1W | |
150 ;; ------------------------------------------------------------------------- | |
151 | |
152 ;; Non-truncating stores. | |
153 (define_insn "@aarch64_scatter_stnt<mode>" | |
154 [(set (mem:BLK (scratch)) | |
155 (unspec:BLK | |
156 [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl") | |
157 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") | |
158 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w") | |
159 (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")] | |
160 | |
161 UNSPEC_STNT1_SCATTER))] | |
162 "TARGET_SVE" | |
163 "@ | |
164 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>] | |
165 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]" | |
166 ) | |
167 | |
168 ;; Truncating stores. | |
169 (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" | |
170 [(set (mem:BLK (scratch)) | |
171 (unspec:BLK | |
172 [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl") | |
173 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") | |
174 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w") | |
175 (truncate:SVE_PARTIAL_I | |
176 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))] | |
177 UNSPEC_STNT1_SCATTER))] | |
178 "TARGET_SVE2 | |
179 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
180 "@ | |
181 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>] | |
182 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]" | |
183 ) | |
184 | |
185 ;; ========================================================================= | |
186 ;; == Uniform binary arithmnetic | |
187 ;; ========================================================================= | |
188 | |
189 ;; ------------------------------------------------------------------------- | |
190 ;; ---- [INT] Multiplication | |
191 ;; ------------------------------------------------------------------------- | |
192 ;; Includes the lane forms of: | |
193 ;; - MUL | |
194 ;; ------------------------------------------------------------------------- | |
195 | |
196 (define_insn "@aarch64_mul_lane_<mode>" | |
197 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") | |
198 (mult:SVE_FULL_HSDI | |
199 (unspec:SVE_FULL_HSDI | |
200 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") | |
201 (match_operand:SI 3 "const_int_operand")] | |
202 UNSPEC_SVE_LANE_SELECT) | |
203 (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))] | |
204 "TARGET_SVE2" | |
205 "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" | |
206 ) | |
207 | |
208 ;; ------------------------------------------------------------------------- | |
209 ;; ---- [INT] Scaled high-part multiplication | |
210 ;; ------------------------------------------------------------------------- | |
211 ;; The patterns in this section are synthetic. | |
212 ;; ------------------------------------------------------------------------- | |
213 | |
214 ;; Unpredicated integer multiply-high-with-(round-and-)scale. | |
215 (define_expand "<su>mulh<r>s<mode>3" | |
216 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") | |
217 (unspec:SVE_FULL_BHSI | |
218 [(match_dup 3) | |
219 (unspec:SVE_FULL_BHSI | |
220 [(match_operand:SVE_FULL_BHSI 1 "register_operand") | |
221 (match_operand:SVE_FULL_BHSI 2 "register_operand")] | |
222 MULHRS)] | |
223 UNSPEC_PRED_X))] | |
224 "TARGET_SVE2" | |
225 { | |
226 operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
227 | |
228 rtx prod_b = gen_reg_rtx (<VWIDE>mode); | |
229 rtx prod_t = gen_reg_rtx (<VWIDE>mode); | |
230 emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1], | |
231 operands[2])); | |
232 emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1], | |
233 operands[2])); | |
234 | |
235 rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1); | |
236 emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift)); | |
237 emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0], | |
238 prod_t, shift)); | |
239 | |
240 DONE; | |
241 } | |
242 ) | |
243 | |
244 ;; ------------------------------------------------------------------------- | |
245 ;; ---- [INT] General binary arithmetic that maps to unspecs | |
246 ;; ------------------------------------------------------------------------- | |
247 ;; Includes: | |
248 ;; - SHADD | |
249 ;; - SHSUB | |
250 ;; - SHSUBR | |
251 ;; - SQRSHL | |
252 ;; - SQRSHLR | |
253 ;; - SRHADD | |
254 ;; - SRSHL | |
255 ;; - SRSHLR | |
256 ;; - SUQADD | |
257 ;; - UHADD | |
258 ;; - UHSUB | |
259 ;; - UHSUBR | |
260 ;; - UQRSHL | |
261 ;; - UQRSHLR | |
262 ;; - URHADD | |
263 ;; - URSHL | |
264 ;; - URSHLR | |
265 ;; - USQADD | |
266 ;; ------------------------------------------------------------------------- | |
267 | |
268 ;; Integer average (floor). | |
269 (define_expand "<u>avg<mode>3_floor" | |
270 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
271 (unspec:SVE_FULL_I | |
272 [(match_dup 3) | |
273 (unspec:SVE_FULL_I | |
274 [(match_operand:SVE_FULL_I 1 "register_operand") | |
275 (match_operand:SVE_FULL_I 2 "register_operand")] | |
276 HADD)] | |
277 UNSPEC_PRED_X))] | |
278 "TARGET_SVE2" | |
279 { | |
280 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
281 } | |
282 ) | |
283 | |
284 ;; Integer average (rounding). | |
285 (define_expand "<u>avg<mode>3_ceil" | |
286 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
287 (unspec:SVE_FULL_I | |
288 [(match_dup 3) | |
289 (unspec:SVE_FULL_I | |
290 [(match_operand:SVE_FULL_I 1 "register_operand") | |
291 (match_operand:SVE_FULL_I 2 "register_operand")] | |
292 RHADD)] | |
293 UNSPEC_PRED_X))] | |
294 "TARGET_SVE2" | |
295 { | |
296 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
297 } | |
298 ) | |
299 | |
300 ;; The immediate form of SQADD acts as an immediate form of SUQADD | |
301 ;; over its full range. In contrast to the ss_plus pattern, we do | |
302 ;; not need to treat byte immediates specially. E.g.: | |
303 ;; | |
304 ;; SQADD Z0.B, Z0.B, #128 | |
305 ;; | |
306 ;; is equivalent to: | |
307 ;; | |
308 ;; MOV Z1.B, #128 | |
309 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B | |
310 ;; | |
311 ;; even though it's not equivalent to: | |
312 ;; | |
313 ;; MOV Z1.B, #128 | |
314 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128 | |
315 (define_insn "@aarch64_sve_suqadd<mode>_const" | |
316 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
317 (unspec:SVE_FULL_I | |
318 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") | |
319 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")] | |
320 UNSPEC_SUQADD))] | |
321 "TARGET_SVE2" | |
322 "@ | |
323 sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
324 movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2" | |
325 [(set_attr "movprfx" "*,yes")] | |
326 ) | |
327 | |
328 ;; General predicated binary arithmetic. All operations handled here | |
329 ;; are commutative or have a reversed form. | |
330 (define_insn "@aarch64_pred_<sve_int_op><mode>" | |
331 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") | |
332 (unspec:SVE_FULL_I | |
333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
334 (unspec:SVE_FULL_I | |
335 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w") | |
336 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")] | |
337 SVE2_COND_INT_BINARY_REV)] | |
338 UNSPEC_PRED_X))] | |
339 "TARGET_SVE2" | |
340 "@ | |
341 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
342 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
343 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
344 [(set_attr "movprfx" "*,*,yes")] | |
345 ) | |
346 | |
347 ;; Predicated binary arithmetic with merging. | |
348 (define_expand "@cond_<sve_int_op><mode>" | |
349 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
350 (unspec:SVE_FULL_I | |
351 [(match_operand:<VPRED> 1 "register_operand") | |
352 (unspec:SVE_FULL_I | |
353 [(match_dup 5) | |
354 (unspec:SVE_FULL_I | |
355 [(match_operand:SVE_FULL_I 2 "register_operand") | |
356 (match_operand:SVE_FULL_I 3 "register_operand")] | |
357 SVE2_COND_INT_BINARY)] | |
358 UNSPEC_PRED_X) | |
359 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] | |
360 UNSPEC_SEL))] | |
361 "TARGET_SVE2" | |
362 { | |
363 operands[5] = CONSTM1_RTX (<MODE>mode); | |
364 } | |
365 ) | |
366 | |
367 ;; Predicated binary arithmetic, merging with the first input. | |
368 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" | |
369 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
370 (unspec:SVE_FULL_I | |
371 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
372 (unspec:SVE_FULL_I | |
373 [(match_operand 4) | |
374 (unspec:SVE_FULL_I | |
375 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") | |
376 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
377 SVE2_COND_INT_BINARY)] | |
378 UNSPEC_PRED_X) | |
379 (match_dup 2)] | |
380 UNSPEC_SEL))] | |
381 "TARGET_SVE2" | |
382 "@ | |
383 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
384 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
385 "&& !CONSTANT_P (operands[4])" | |
386 { | |
387 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
388 } | |
389 [(set_attr "movprfx" "*,yes")] | |
390 ) | |
391 | |
392 ;; Predicated binary arithmetic, merging with the second input. | |
393 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" | |
394 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
395 (unspec:SVE_FULL_I | |
396 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
397 (unspec:SVE_FULL_I | |
398 [(match_operand 4) | |
399 (unspec:SVE_FULL_I | |
400 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
401 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] | |
402 SVE2_COND_INT_BINARY_REV)] | |
403 UNSPEC_PRED_X) | |
404 (match_dup 3)] | |
405 UNSPEC_SEL))] | |
406 "TARGET_SVE2" | |
407 "@ | |
408 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
409 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
410 "&& !CONSTANT_P (operands[4])" | |
411 { | |
412 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
413 } | |
414 [(set_attr "movprfx" "*,yes")] | |
415 ) | |
416 | |
417 ;; Predicated binary operations, merging with an independent value. | |
418 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" | |
419 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
420 (unspec:SVE_FULL_I | |
421 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
422 (unspec:SVE_FULL_I | |
423 [(match_operand 5) | |
424 (unspec:SVE_FULL_I | |
425 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") | |
426 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")] | |
427 SVE2_COND_INT_BINARY_REV)] | |
428 UNSPEC_PRED_X) | |
429 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
430 UNSPEC_SEL))] | |
431 "TARGET_SVE2 | |
432 && !rtx_equal_p (operands[2], operands[4]) | |
433 && !rtx_equal_p (operands[3], operands[4])" | |
434 "@ | |
435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
439 #" | |
440 "&& 1" | |
441 { | |
442 if (reload_completed | |
443 && register_operand (operands[4], <MODE>mode) | |
444 && !rtx_equal_p (operands[0], operands[4])) | |
445 { | |
446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
447 operands[4], operands[1])); | |
448 operands[4] = operands[2] = operands[0]; | |
449 } | |
450 else if (!CONSTANT_P (operands[5])) | |
451 operands[5] = CONSTM1_RTX (<VPRED>mode); | |
452 else | |
453 FAIL; | |
454 } | |
455 [(set_attr "movprfx" "yes")] | |
456 ) | |
457 | |
458 ;; Predicated binary operations with no reverse form, merging with zero. | |
459 ;; At present we don't generate these patterns via a cond_* optab, | |
460 ;; so there's no correctness requirement to handle merging with an | |
461 ;; independent value. | |
462 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" | |
463 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") | |
464 (unspec:SVE_FULL_I | |
465 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
466 (unspec:SVE_FULL_I | |
467 [(match_operand 5) | |
468 (unspec:SVE_FULL_I | |
469 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") | |
470 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
471 SVE2_COND_INT_BINARY_NOREV)] | |
472 UNSPEC_PRED_X) | |
473 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] | |
474 UNSPEC_SEL))] | |
475 "TARGET_SVE2" | |
476 "@ | |
477 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
478 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
479 "&& !CONSTANT_P (operands[5])" | |
480 { | |
481 operands[5] = CONSTM1_RTX (<VPRED>mode); | |
482 } | |
483 [(set_attr "movprfx" "yes")] | |
484 ) | |
485 | |
486 ;; ------------------------------------------------------------------------- | |
487 ;; ---- [INT] Saturating binary arithmetic | |
488 ;; ------------------------------------------------------------------------- | |
489 ;; Includes: | |
490 ;; - SQDMULH | |
491 ;; - SQRDMULH | |
492 ;; ------------------------------------------------------------------------- | |
493 | |
494 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
495 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") | |
496 (unspec:SVE_FULL_I | |
497 [(match_operand:SVE_FULL_I 1 "register_operand" "w") | |
498 (match_operand:SVE_FULL_I 2 "register_operand" "w")] | |
499 SVE2_INT_BINARY))] | |
500 "TARGET_SVE2" | |
501 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
502 ) | |
503 | |
504 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" | |
505 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") | |
506 (unspec:SVE_FULL_HSDI | |
507 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") | |
508 (unspec:SVE_FULL_HSDI | |
509 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") | |
510 (match_operand:SI 3 "const_int_operand")] | |
511 UNSPEC_SVE_LANE_SELECT)] | |
512 SVE2_INT_BINARY_LANE))] | |
513 "TARGET_SVE2" | |
514 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" | |
515 ) | |
516 | |
517 ;; ------------------------------------------------------------------------- | |
518 ;; ---- [INT] Saturating left shifts | |
519 ;; ------------------------------------------------------------------------- | |
520 ;; Includes: | |
521 ;; - SQSHL | |
522 ;; - SQSHLR | |
523 ;; - UQSHL | |
524 ;; - UQSHLR | |
525 ;; ------------------------------------------------------------------------- | |
526 | |
527 ;; Predicated left shifts. | |
528 (define_insn "@aarch64_pred_<sve_int_op><mode>" | |
529 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w") | |
530 (unspec:SVE_FULL_I | |
531 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
532 (unspec:SVE_FULL_I | |
533 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w") | |
534 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")] | |
535 SVE2_COND_INT_SHIFT)] | |
536 UNSPEC_PRED_X))] | |
537 "TARGET_SVE2" | |
538 "@ | |
539 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
540 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
541 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
542 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
543 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
544 [(set_attr "movprfx" "*,*,*,yes,yes")] | |
545 ) | |
546 | |
547 ;; Predicated left shifts with merging. | |
548 (define_expand "@cond_<sve_int_op><mode>" | |
549 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
550 (unspec:SVE_FULL_I | |
551 [(match_operand:<VPRED> 1 "register_operand") | |
552 (unspec:SVE_FULL_I | |
553 [(match_dup 5) | |
554 (unspec:SVE_FULL_I | |
555 [(match_operand:SVE_FULL_I 2 "register_operand") | |
556 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")] | |
557 SVE2_COND_INT_SHIFT)] | |
558 UNSPEC_PRED_X) | |
559 (match_operand:SVE_FULL_I 4 "register_operand")] | |
560 UNSPEC_SEL))] | |
561 "TARGET_SVE2" | |
562 { | |
563 operands[5] = CONSTM1_RTX (<VPRED>mode); | |
564 } | |
565 ) | |
566 | |
567 ;; Predicated left shifts, merging with the first input. | |
568 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" | |
569 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") | |
570 (unspec:SVE_FULL_I | |
571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
572 (unspec:SVE_FULL_I | |
573 [(match_operand 4) | |
574 (unspec:SVE_FULL_I | |
575 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w") | |
576 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")] | |
577 SVE2_COND_INT_SHIFT)] | |
578 UNSPEC_PRED_X) | |
579 (match_dup 2)] | |
580 UNSPEC_SEL))] | |
581 "TARGET_SVE2" | |
582 "@ | |
583 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
584 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
585 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
586 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
587 "&& !CONSTANT_P (operands[4])" | |
588 { | |
589 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
590 } | |
591 [(set_attr "movprfx" "*,*,yes,yes")] | |
592 ) | |
593 | |
594 ;; Predicated left shifts, merging with the second input. | |
595 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" | |
596 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
597 (unspec:SVE_FULL_I | |
598 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
599 (unspec:SVE_FULL_I | |
600 [(match_operand 4) | |
601 (unspec:SVE_FULL_I | |
602 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
603 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] | |
604 SVE2_COND_INT_SHIFT)] | |
605 UNSPEC_PRED_X) | |
606 (match_dup 3)] | |
607 UNSPEC_SEL))] | |
608 "TARGET_SVE2" | |
609 "@ | |
610 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
611 movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
612 "&& !CONSTANT_P (operands[4])" | |
613 { | |
614 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
615 } | |
616 [(set_attr "movprfx" "*,yes")] | |
617 ) | |
618 | |
619 ;; Predicated left shifts, merging with an independent value. | |
620 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" | |
621 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w") | |
622 (unspec:SVE_FULL_I | |
623 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl") | |
624 (unspec:SVE_FULL_I | |
625 [(match_operand 5) | |
626 (unspec:SVE_FULL_I | |
627 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w") | |
628 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")] | |
629 SVE2_COND_INT_SHIFT)] | |
630 UNSPEC_PRED_X) | |
631 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")] | |
632 UNSPEC_SEL))] | |
633 "TARGET_SVE2 | |
634 && !rtx_equal_p (operands[2], operands[4]) | |
635 && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))" | |
636 "@ | |
637 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
638 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
639 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
640 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
643 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
644 # | |
645 #" | |
646 "&& 1" | |
647 { | |
648 if (reload_completed | |
649 && register_operand (operands[4], <MODE>mode) | |
650 && !rtx_equal_p (operands[0], operands[4])) | |
651 { | |
652 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
653 operands[4], operands[1])); | |
654 operands[4] = operands[2] = operands[0]; | |
655 } | |
656 else if (!CONSTANT_P (operands[5])) | |
657 operands[5] = CONSTM1_RTX (<VPRED>mode); | |
658 else | |
659 FAIL; | |
660 } | |
661 [(set_attr "movprfx" "yes")] | |
662 ) | |
663 | |
664 ;; ========================================================================= | |
665 ;; == Uniform ternary arithmnetic | |
666 ;; ========================================================================= | |
667 | |
668 ;; ------------------------------------------------------------------------- | |
669 ;; ---- [INT] General ternary arithmetic that maps to unspecs | |
670 ;; ------------------------------------------------------------------------- | |
671 ;; Includes: | |
672 ;; - ADCLB | |
673 ;; - ADCLT | |
674 ;; - EORBT | |
675 ;; - EORTB | |
676 ;; - SBCLB | |
677 ;; - SBCLT | |
678 ;; - SQRDMLAH | |
679 ;; - SQRDMLSH | |
680 ;; ------------------------------------------------------------------------- | |
681 | |
682 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
683 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
684 (unspec:SVE_FULL_I | |
685 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
686 (match_operand:SVE_FULL_I 3 "register_operand" "w, w") | |
687 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")] | |
688 SVE2_INT_TERNARY))] | |
689 "TARGET_SVE2" | |
690 "@ | |
691 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> | |
692 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
693 ) | |
694 | |
695 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" | |
696 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
697 (unspec:SVE_FULL_HSDI | |
698 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") | |
699 (unspec:SVE_FULL_HSDI | |
700 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
701 (match_operand:SI 4 "const_int_operand")] | |
702 UNSPEC_SVE_LANE_SELECT) | |
703 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")] | |
704 SVE2_INT_TERNARY_LANE))] | |
705 "TARGET_SVE2" | |
706 "@ | |
707 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] | |
708 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" | |
709 ) | |
710 | |
711 ;; ------------------------------------------------------------------------- | |
712 ;; ---- [INT] Multiply-and-accumulate operations | |
713 ;; ------------------------------------------------------------------------- | |
714 ;; Includes the lane forms of: | |
715 ;; - MLA | |
716 ;; - MLS | |
717 ;; ------------------------------------------------------------------------- | |
718 | |
719 (define_insn "@aarch64_sve_add_mul_lane_<mode>" | |
720 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
721 (plus:SVE_FULL_HSDI | |
722 (mult:SVE_FULL_HSDI | |
723 (unspec:SVE_FULL_HSDI | |
724 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
725 (match_operand:SI 4 "const_int_operand")] | |
726 UNSPEC_SVE_LANE_SELECT) | |
727 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")) | |
728 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] | |
729 "TARGET_SVE2" | |
730 "@ | |
731 mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] | |
732 movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" | |
733 [(set_attr "movprfx" "*,yes")] | |
734 ) | |
735 | |
736 (define_insn "@aarch64_sve_sub_mul_lane_<mode>" | |
737 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
738 (minus:SVE_FULL_HSDI | |
739 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") | |
740 (mult:SVE_FULL_HSDI | |
741 (unspec:SVE_FULL_HSDI | |
742 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
743 (match_operand:SI 4 "const_int_operand")] | |
744 UNSPEC_SVE_LANE_SELECT) | |
745 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))] | |
746 "TARGET_SVE2" | |
747 "@ | |
748 mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] | |
749 movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" | |
750 [(set_attr "movprfx" "*,yes")] | |
751 ) | |
752 | |
753 ;; ------------------------------------------------------------------------- | |
754 ;; ---- [INT] Binary logic operations with rotation | |
755 ;; ------------------------------------------------------------------------- | |
756 ;; Includes: | |
757 ;; - XAR | |
758 ;; ------------------------------------------------------------------------- | |
759 | |
760 (define_insn "@aarch64_sve2_xar<mode>" | |
761 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
762 (rotatert:SVE_FULL_I | |
763 (xor:SVE_FULL_I | |
764 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w") | |
765 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) | |
766 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))] | |
767 "TARGET_SVE2" | |
768 "@ | |
769 xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 | |
770 movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" | |
771 [(set_attr "movprfx" "*,yes")] | |
772 ) | |
773 | |
774 ;; ------------------------------------------------------------------------- | |
775 ;; ---- [INT] Ternary logic operations | |
776 ;; ------------------------------------------------------------------------- | |
777 ;; Includes: | |
778 ;; - BCAX | |
779 ;; - BSL | |
780 ;; - BSL1N | |
781 ;; - BSL2N | |
782 ;; - EOR3 | |
783 ;; - NBSL | |
784 ;; ------------------------------------------------------------------------- | |
785 | |
786 ;; Unpredicated exclusive OR of AND. | |
787 (define_insn "@aarch64_sve2_bcax<mode>" | |
788 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
789 (xor:SVE_FULL_I | |
790 (and:SVE_FULL_I | |
791 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
792 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) | |
793 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] | |
794 "TARGET_SVE2" | |
795 "@ | |
796 bcax\t%0.d, %0.d, %2.d, %3.d | |
797 movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d" | |
798 [(set_attr "movprfx" "*,yes")] | |
799 ) | |
800 | |
801 ;; Unpredicated 3-way exclusive OR. | |
802 (define_insn "@aarch64_sve2_eor3<mode>" | |
803 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") | |
804 (xor:SVE_FULL_I | |
805 (xor:SVE_FULL_I | |
806 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w") | |
807 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")) | |
808 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))] | |
809 "TARGET_SVE2" | |
810 "@ | |
811 eor3\t%0.d, %0.d, %2.d, %3.d | |
812 eor3\t%0.d, %0.d, %1.d, %3.d | |
813 eor3\t%0.d, %0.d, %1.d, %2.d | |
814 movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" | |
815 [(set_attr "movprfx" "*,*,*,yes")] | |
816 ) | |
817 | |
818 ;; Use NBSL for vector NOR. | |
819 (define_insn_and_rewrite "*aarch64_sve2_nor<mode>" | |
820 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
821 (unspec:SVE_FULL_I | |
822 [(match_operand 3) | |
823 (and:SVE_FULL_I | |
824 (not:SVE_FULL_I | |
825 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) | |
826 (not:SVE_FULL_I | |
827 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] | |
828 UNSPEC_PRED_X))] | |
829 "TARGET_SVE2" | |
830 "@ | |
831 nbsl\t%0.d, %0.d, %2.d, %0.d | |
832 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" | |
833 "&& !CONSTANT_P (operands[3])" | |
834 { | |
835 operands[3] = CONSTM1_RTX (<VPRED>mode); | |
836 } | |
837 [(set_attr "movprfx" "*,yes")] | |
838 ) | |
839 | |
840 ;; Use NBSL for vector NAND. | |
841 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>" | |
842 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
843 (unspec:SVE_FULL_I | |
844 [(match_operand 3) | |
845 (ior:SVE_FULL_I | |
846 (not:SVE_FULL_I | |
847 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) | |
848 (not:SVE_FULL_I | |
849 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] | |
850 UNSPEC_PRED_X))] | |
851 "TARGET_SVE2" | |
852 "@ | |
853 nbsl\t%0.d, %0.d, %2.d, %2.d | |
854 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" | |
855 "&& !CONSTANT_P (operands[3])" | |
856 { | |
857 operands[3] = CONSTM1_RTX (<VPRED>mode); | |
858 } | |
859 [(set_attr "movprfx" "*,yes")] | |
860 ) | |
861 | |
862 ;; Unpredicated bitwise select. | |
863 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) | |
864 (define_expand "@aarch64_sve2_bsl<mode>" | |
865 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
866 (xor:SVE_FULL_I | |
867 (and:SVE_FULL_I | |
868 (xor:SVE_FULL_I | |
869 (match_operand:SVE_FULL_I 1 "register_operand") | |
870 (match_operand:SVE_FULL_I 2 "register_operand")) | |
871 (match_operand:SVE_FULL_I 3 "register_operand")) | |
872 (match_dup 2)))] | |
873 "TARGET_SVE2" | |
874 ) | |
875 | |
876 (define_insn "*aarch64_sve2_bsl<mode>" | |
877 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
878 (xor:SVE_FULL_I | |
879 (and:SVE_FULL_I | |
880 (xor:SVE_FULL_I | |
881 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") | |
882 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) | |
883 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) | |
884 (match_dup BSL_DUP)))] | |
885 "TARGET_SVE2" | |
886 "@ | |
887 bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d | |
888 movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" | |
889 [(set_attr "movprfx" "*,yes")] | |
890 ) | |
891 | |
892 ;; Unpredicated bitwise inverted select. | |
893 ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) | |
894 (define_expand "@aarch64_sve2_nbsl<mode>" | |
895 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
896 (unspec:SVE_FULL_I | |
897 [(match_dup 4) | |
898 (not:SVE_FULL_I | |
899 (xor:SVE_FULL_I | |
900 (and:SVE_FULL_I | |
901 (xor:SVE_FULL_I | |
902 (match_operand:SVE_FULL_I 1 "register_operand") | |
903 (match_operand:SVE_FULL_I 2 "register_operand")) | |
904 (match_operand:SVE_FULL_I 3 "register_operand")) | |
905 (match_dup 2)))] | |
906 UNSPEC_PRED_X))] | |
907 "TARGET_SVE2" | |
908 { | |
909 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
910 } | |
911 ) | |
912 | |
913 (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" | |
914 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
915 (unspec:SVE_FULL_I | |
916 [(match_operand 4) | |
917 (not:SVE_FULL_I | |
918 (xor:SVE_FULL_I | |
919 (and:SVE_FULL_I | |
920 (xor:SVE_FULL_I | |
921 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") | |
922 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) | |
923 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) | |
924 (match_dup BSL_DUP)))] | |
925 UNSPEC_PRED_X))] | |
926 "TARGET_SVE2" | |
927 "@ | |
928 nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d | |
929 movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" | |
930 "&& !CONSTANT_P (operands[4])" | |
931 { | |
932 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
933 } | |
934 [(set_attr "movprfx" "*,yes")] | |
935 ) | |
936 | |
937 ;; Unpredicated bitwise select with inverted first operand. | |
938 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) | |
939 (define_expand "@aarch64_sve2_bsl1n<mode>" | |
940 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
941 (xor:SVE_FULL_I | |
942 (and:SVE_FULL_I | |
943 (unspec:SVE_FULL_I | |
944 [(match_dup 4) | |
945 (not:SVE_FULL_I | |
946 (xor:SVE_FULL_I | |
947 (match_operand:SVE_FULL_I 1 "register_operand") | |
948 (match_operand:SVE_FULL_I 2 "register_operand")))] | |
949 UNSPEC_PRED_X) | |
950 (match_operand:SVE_FULL_I 3 "register_operand")) | |
951 (match_dup 2)))] | |
952 "TARGET_SVE2" | |
953 { | |
954 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
955 } | |
956 ) | |
957 | |
958 (define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" | |
959 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
960 (xor:SVE_FULL_I | |
961 (and:SVE_FULL_I | |
962 (unspec:SVE_FULL_I | |
963 [(match_operand 4) | |
964 (not:SVE_FULL_I | |
965 (xor:SVE_FULL_I | |
966 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") | |
967 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))] | |
968 UNSPEC_PRED_X) | |
969 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) | |
970 (match_dup BSL_DUP)))] | |
971 "TARGET_SVE2" | |
972 "@ | |
973 bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d | |
974 movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" | |
975 "&& !CONSTANT_P (operands[4])" | |
976 { | |
977 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
978 } | |
979 [(set_attr "movprfx" "*,yes")] | |
980 ) | |
981 | |
982 ;; Unpredicated bitwise select with inverted second operand. | |
983 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) | |
984 (define_expand "@aarch64_sve2_bsl2n<mode>" | |
985 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
986 (ior:SVE_FULL_I | |
987 (and:SVE_FULL_I | |
988 (match_operand:SVE_FULL_I 1 "register_operand") | |
989 (match_operand:SVE_FULL_I 3 "register_operand")) | |
990 (unspec:SVE_FULL_I | |
991 [(match_dup 4) | |
992 (and:SVE_FULL_I | |
993 (not:SVE_FULL_I | |
994 (match_operand:SVE_FULL_I 2 "register_operand")) | |
995 (not:SVE_FULL_I | |
996 (match_dup 3)))] | |
997 UNSPEC_PRED_X)))] | |
998 "TARGET_SVE2" | |
999 { | |
1000 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1001 } | |
1002 ) | |
1003 | |
1004 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" | |
1005 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1006 (ior:SVE_FULL_I | |
1007 (and:SVE_FULL_I | |
1008 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") | |
1009 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) | |
1010 (unspec:SVE_FULL_I | |
1011 [(match_operand 4) | |
1012 (and:SVE_FULL_I | |
1013 (not:SVE_FULL_I | |
1014 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) | |
1015 (not:SVE_FULL_I | |
1016 (match_dup BSL_DUP)))] | |
1017 UNSPEC_PRED_X)))] | |
1018 "TARGET_SVE2" | |
1019 "@ | |
1020 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d | |
1021 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" | |
1022 "&& !CONSTANT_P (operands[4])" | |
1023 { | |
1024 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1025 } | |
1026 [(set_attr "movprfx" "*,yes")] | |
1027 ) | |
1028 | |
1029 ;; Unpredicated bitwise select with inverted second operand, alternative form. | |
1030 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) | |
1031 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" | |
1032 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1033 (ior:SVE_FULL_I | |
1034 (and:SVE_FULL_I | |
1035 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") | |
1036 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) | |
1037 (unspec:SVE_FULL_I | |
1038 [(match_operand 4) | |
1039 (and:SVE_FULL_I | |
1040 (not:SVE_FULL_I | |
1041 (match_dup BSL_DUP)) | |
1042 (not:SVE_FULL_I | |
1043 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))] | |
1044 UNSPEC_PRED_X)))] | |
1045 "TARGET_SVE2" | |
1046 "@ | |
1047 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d | |
1048 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" | |
1049 "&& !CONSTANT_P (operands[4])" | |
1050 { | |
1051 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1052 } | |
1053 [(set_attr "movprfx" "*,yes")] | |
1054 ) | |
1055 | |
1056 ;; ------------------------------------------------------------------------- | |
1057 ;; ---- [INT] Shift-and-accumulate operations | |
1058 ;; ------------------------------------------------------------------------- | |
1059 ;; Includes: | |
1060 ;; - SRSRA | |
1061 ;; - SSRA | |
1062 ;; - URSRA | |
1063 ;; - USRA | |
1064 ;; ------------------------------------------------------------------------- | |
1065 | |
1066 ;; Provide the natural unpredicated interface for SSRA and USRA. | |
1067 (define_expand "@aarch64_sve_add_<sve_int_op><mode>" | |
1068 [(set (match_operand:SVE_FULL_I 0 "register_operand") | |
1069 (plus:SVE_FULL_I | |
1070 (unspec:SVE_FULL_I | |
1071 [(match_dup 4) | |
1072 (SHIFTRT:SVE_FULL_I | |
1073 (match_operand:SVE_FULL_I 2 "register_operand") | |
1074 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] | |
1075 UNSPEC_PRED_X) | |
1076 (match_operand:SVE_FULL_I 1 "register_operand")))] | |
1077 "TARGET_SVE2" | |
1078 { | |
1079 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1080 } | |
1081 ) | |
1082 | |
1083 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate | |
1084 ;; isn't needed. | |
1085 (define_insn_and_rewrite "*aarch64_sve2_sra<mode>" | |
1086 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1087 (plus:SVE_FULL_I | |
1088 (unspec:SVE_FULL_I | |
1089 [(match_operand 4) | |
1090 (SHIFTRT:SVE_FULL_I | |
1091 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
1092 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] | |
1093 UNSPEC_PRED_X) | |
1094 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] | |
1095 "TARGET_SVE2" | |
1096 "@ | |
1097 <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3 | |
1098 movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3" | |
1099 "&& !CONSTANT_P (operands[4])" | |
1100 { | |
1101 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1102 } | |
1103 [(set_attr "movprfx" "*,yes")] | |
1104 ) | |
1105 | |
1106 ;; SRSRA and URSRA. | |
1107 (define_insn "@aarch64_sve_add_<sve_int_op><mode>" | |
1108 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1109 (plus:SVE_FULL_I | |
1110 (unspec:SVE_FULL_I | |
1111 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
1112 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] | |
1113 VRSHR_N) | |
1114 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] | |
1115 "TARGET_SVE2" | |
1116 "@ | |
1117 <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3 | |
1118 movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3" | |
1119 [(set_attr "movprfx" "*,yes")] | |
1120 ) | |
1121 | |
1122 ;; ------------------------------------------------------------------------- | |
1123 ;; ---- [INT] Shift-and-insert operations | |
1124 ;; ------------------------------------------------------------------------- | |
1125 ;; Includes: | |
1126 ;; - SLI | |
1127 ;; - SRI | |
1128 ;; ------------------------------------------------------------------------- | |
1129 | |
1130 ;; These instructions do not take MOVPRFX. | |
1131 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1132 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") | |
1133 (unspec:SVE_FULL_I | |
1134 [(match_operand:SVE_FULL_I 1 "register_operand" "0") | |
1135 (match_operand:SVE_FULL_I 2 "register_operand" "w") | |
1136 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] | |
1137 SVE2_INT_SHIFT_INSERT))] | |
1138 "TARGET_SVE2" | |
1139 "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3" | |
1140 ) | |
1141 | |
1142 ;; ------------------------------------------------------------------------- | |
1143 ;; ---- [INT] Sum of absolute differences | |
1144 ;; ------------------------------------------------------------------------- | |
1145 ;; Includes: | |
1146 ;; - SABA | |
1147 ;; - UABA | |
1148 ;; ------------------------------------------------------------------------- | |
1149 | |
1150 ;; Provide the natural unpredicated interface for SABA and UABA. | |
1151 (define_expand "@aarch64_sve2_<su>aba<mode>" | |
1152 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1153 (plus:SVE_FULL_I | |
1154 (minus:SVE_FULL_I | |
1155 (unspec:SVE_FULL_I | |
1156 [(match_dup 4) | |
1157 (USMAX:SVE_FULL_I | |
1158 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
1159 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] | |
1160 UNSPEC_PRED_X) | |
1161 (unspec:SVE_FULL_I | |
1162 [(match_dup 4) | |
1163 (<max_opp>:SVE_FULL_I | |
1164 (match_dup 2) | |
1165 (match_dup 3))] | |
1166 UNSPEC_PRED_X)) | |
1167 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] | |
1168 "TARGET_SVE2" | |
1169 { | |
1170 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1171 } | |
1172 ) | |
1173 | |
1174 ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate | |
1175 ;; operation whose predicates aren't needed. | |
1176 (define_insn "*aarch64_sve2_<su>aba<mode>" | |
1177 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1178 (plus:SVE_FULL_I | |
1179 (minus:SVE_FULL_I | |
1180 (unspec:SVE_FULL_I | |
1181 [(match_operand 4) | |
1182 (USMAX:SVE_FULL_I | |
1183 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
1184 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] | |
1185 UNSPEC_PRED_X) | |
1186 (unspec:SVE_FULL_I | |
1187 [(match_operand 5) | |
1188 (<max_opp>:SVE_FULL_I | |
1189 (match_dup 2) | |
1190 (match_dup 3))] | |
1191 UNSPEC_PRED_X)) | |
1192 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] | |
1193 "TARGET_SVE2" | |
1194 "@ | |
1195 <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> | |
1196 movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
1197 [(set_attr "movprfx" "*,yes")] | |
1198 ) | |
1199 | |
1200 ;; ========================================================================= | |
1201 ;; == Extending arithmetic | |
1202 ;; ========================================================================= | |
1203 | |
1204 ;; ------------------------------------------------------------------------- | |
1205 ;; ---- [INT] Wide binary arithmetic | |
1206 ;; ------------------------------------------------------------------------- | |
1207 ;; Includes: | |
1208 ;; - SADDWB | |
1209 ;; - SADDWT | |
1210 ;; - SSUBWB | |
1211 ;; - SSUBWT | |
1212 ;; - UADDWB | |
1213 ;; - UADDWT | |
1214 ;; - USUBWB | |
1215 ;; - USUBWT | |
1216 ;; ------------------------------------------------------------------------- | |
1217 | |
1218 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1219 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") | |
1220 (unspec:SVE_FULL_HSDI | |
1221 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") | |
1222 (match_operand:<VNARROW> 2 "register_operand" "w")] | |
1223 SVE2_INT_BINARY_WIDE))] | |
1224 "TARGET_SVE2" | |
1225 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>" | |
1226 ) | |
1227 | |
1228 ;; ------------------------------------------------------------------------- | |
1229 ;; ---- [INT] Long binary arithmetic | |
1230 ;; ------------------------------------------------------------------------- | |
1231 ;; Includes: | |
1232 ;; - SABDLB | |
1233 ;; - SABDLT | |
1234 ;; - SADDLB | |
1235 ;; - SADDLBT | |
1236 ;; - SADDLT | |
1237 ;; - SMULLB | |
1238 ;; - SMULLT | |
1239 ;; - SQDMULLB | |
1240 ;; - SQDMULLT | |
1241 ;; - SSUBLB | |
1242 ;; - SSUBLBT | |
1243 ;; - SSUBLT | |
1244 ;; - SSUBLTB | |
1245 ;; - UABDLB | |
1246 ;; - UABDLT | |
1247 ;; - UADDLB | |
1248 ;; - UADDLT | |
1249 ;; - UMULLB | |
1250 ;; - UMULLT | |
1251 ;; - USUBLB | |
1252 ;; - USUBLT | |
1253 ;; ------------------------------------------------------------------------- | |
1254 | |
1255 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1256 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") | |
1257 (unspec:SVE_FULL_HSDI | |
1258 [(match_operand:<VNARROW> 1 "register_operand" "w") | |
1259 (match_operand:<VNARROW> 2 "register_operand" "w")] | |
1260 SVE2_INT_BINARY_LONG))] | |
1261 "TARGET_SVE2" | |
1262 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" | |
1263 ) | |
1264 | |
1265 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" | |
1266 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") | |
1267 (unspec:SVE_FULL_SDI | |
1268 [(match_operand:<VNARROW> 1 "register_operand" "w") | |
1269 (unspec:<VNARROW> | |
1270 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>") | |
1271 (match_operand:SI 3 "const_int_operand")] | |
1272 UNSPEC_SVE_LANE_SELECT)] | |
1273 SVE2_INT_BINARY_LONG_LANE))] | |
1274 "TARGET_SVE2" | |
1275 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" | |
1276 ) | |
1277 | |
1278 ;; ------------------------------------------------------------------------- | |
1279 ;; ---- [INT] Long left shifts | |
1280 ;; ------------------------------------------------------------------------- | |
1281 ;; Includes: | |
1282 ;; - SSHLLB | |
1283 ;; - SSHLLT | |
1284 ;; - USHLLB | |
1285 ;; - USHLLT | |
1286 ;; ------------------------------------------------------------------------- | |
1287 | |
1288 ;; The immediate range is enforced before generating the instruction. | |
1289 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1290 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") | |
1291 (unspec:SVE_FULL_HSDI | |
1292 [(match_operand:<VNARROW> 1 "register_operand" "w") | |
1293 (match_operand:DI 2 "const_int_operand")] | |
1294 SVE2_INT_SHIFT_IMM_LONG))] | |
1295 "TARGET_SVE2" | |
1296 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2" | |
1297 ) | |
1298 | |
1299 ;; ------------------------------------------------------------------------- | |
1300 ;; ---- [INT] Long binary arithmetic with accumulation | |
1301 ;; ------------------------------------------------------------------------- | |
1302 ;; Includes: | |
1303 ;; - SABALB | |
1304 ;; - SABALT | |
1305 ;; - SMLALB | |
1306 ;; - SMLALT | |
1307 ;; - SMLSLB | |
1308 ;; - SMLSLT | |
1309 ;; - SQDMLALB | |
1310 ;; - SQDMLALBT | |
1311 ;; - SQDMLALT | |
1312 ;; - SQDMLSLB | |
1313 ;; - SQDMLSLBT | |
1314 ;; - SQDMLSLT | |
1315 ;; - UABALB | |
1316 ;; - UABALT | |
1317 ;; - UMLALB | |
1318 ;; - UMLALT | |
1319 ;; - UMLSLB | |
1320 ;; - UMLSLT | |
1321 ;; ------------------------------------------------------------------------- | |
1322 | |
1323 ;; Non-saturating MLA operations. | |
1324 (define_insn "@aarch64_sve_add_<sve_int_op><mode>" | |
1325 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
1326 (plus:SVE_FULL_HSDI | |
1327 (unspec:SVE_FULL_HSDI | |
1328 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1329 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1330 SVE2_INT_ADD_BINARY_LONG) | |
1331 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] | |
1332 "TARGET_SVE2" | |
1333 "@ | |
1334 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> | |
1335 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" | |
1336 [(set_attr "movprfx" "*,yes")] | |
1337 ) | |
1338 | |
1339 ;; Non-saturating MLA operations with lane select. | |
1340 (define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>" | |
1341 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1342 (plus:SVE_FULL_SDI | |
1343 (unspec:SVE_FULL_SDI | |
1344 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1345 (unspec:<VNARROW> | |
1346 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1347 (match_operand:SI 4 "const_int_operand")] | |
1348 UNSPEC_SVE_LANE_SELECT)] | |
1349 SVE2_INT_ADD_BINARY_LONG_LANE) | |
1350 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] | |
1351 "TARGET_SVE2" | |
1352 "@ | |
1353 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] | |
1354 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" | |
1355 [(set_attr "movprfx" "*,yes")] | |
1356 ) | |
1357 | |
1358 ;; Saturating MLA operations. | |
1359 (define_insn "@aarch64_sve_qadd_<sve_int_op><mode>" | |
1360 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
1361 (ss_plus:SVE_FULL_HSDI | |
1362 (unspec:SVE_FULL_HSDI | |
1363 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1364 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1365 SVE2_INT_QADD_BINARY_LONG) | |
1366 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] | |
1367 "TARGET_SVE2" | |
1368 "@ | |
1369 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> | |
1370 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" | |
1371 [(set_attr "movprfx" "*,yes")] | |
1372 ) | |
1373 | |
1374 ;; Saturating MLA operations with lane select. | |
1375 (define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>" | |
1376 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1377 (ss_plus:SVE_FULL_SDI | |
1378 (unspec:SVE_FULL_SDI | |
1379 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1380 (unspec:<VNARROW> | |
1381 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1382 (match_operand:SI 4 "const_int_operand")] | |
1383 UNSPEC_SVE_LANE_SELECT)] | |
1384 SVE2_INT_QADD_BINARY_LONG_LANE) | |
1385 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] | |
1386 "TARGET_SVE2" | |
1387 "@ | |
1388 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] | |
1389 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" | |
1390 [(set_attr "movprfx" "*,yes")] | |
1391 ) | |
1392 | |
1393 ;; Non-saturating MLS operations. | |
1394 (define_insn "@aarch64_sve_sub_<sve_int_op><mode>" | |
1395 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
1396 (minus:SVE_FULL_HSDI | |
1397 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") | |
1398 (unspec:SVE_FULL_HSDI | |
1399 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1400 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1401 SVE2_INT_SUB_BINARY_LONG)))] | |
1402 "TARGET_SVE2" | |
1403 "@ | |
1404 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> | |
1405 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" | |
1406 [(set_attr "movprfx" "*,yes")] | |
1407 ) | |
1408 | |
1409 ;; Non-saturating MLS operations with lane select. | |
1410 (define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>" | |
1411 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1412 (minus:SVE_FULL_SDI | |
1413 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") | |
1414 (unspec:SVE_FULL_SDI | |
1415 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1416 (unspec:<VNARROW> | |
1417 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1418 (match_operand:SI 4 "const_int_operand")] | |
1419 UNSPEC_SVE_LANE_SELECT)] | |
1420 SVE2_INT_SUB_BINARY_LONG_LANE)))] | |
1421 "TARGET_SVE2" | |
1422 "@ | |
1423 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] | |
1424 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" | |
1425 [(set_attr "movprfx" "*,yes")] | |
1426 ) | |
1427 | |
1428 ;; Saturating MLS operations. | |
1429 (define_insn "@aarch64_sve_qsub_<sve_int_op><mode>" | |
1430 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
1431 (ss_minus:SVE_FULL_HSDI | |
1432 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") | |
1433 (unspec:SVE_FULL_HSDI | |
1434 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1435 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1436 SVE2_INT_QSUB_BINARY_LONG)))] | |
1437 "TARGET_SVE2" | |
1438 "@ | |
1439 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> | |
1440 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" | |
1441 [(set_attr "movprfx" "*,yes")] | |
1442 ) | |
1443 | |
1444 ;; Saturating MLS operations with lane select. | |
1445 (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>" | |
1446 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1447 (ss_minus:SVE_FULL_SDI | |
1448 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") | |
1449 (unspec:SVE_FULL_SDI | |
1450 [(match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1451 (unspec:<VNARROW> | |
1452 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1453 (match_operand:SI 4 "const_int_operand")] | |
1454 UNSPEC_SVE_LANE_SELECT)] | |
1455 SVE2_INT_QSUB_BINARY_LONG_LANE)))] | |
1456 "TARGET_SVE2" | |
1457 "@ | |
1458 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] | |
1459 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" | |
1460 [(set_attr "movprfx" "*,yes")] | |
1461 ) | |
1462 ;; ------------------------------------------------------------------------- | |
1463 ;; ---- [FP] Long multiplication with accumulation | |
1464 ;; ------------------------------------------------------------------------- | |
1465 ;; Includes: | |
1466 ;; - FMLALB | |
1467 ;; - FMLALT | |
1468 ;; - FMLSLB | |
1469 ;; - FMLSLT | |
1470 ;; ------------------------------------------------------------------------- | |
1471 | |
1472 (define_insn "@aarch64_sve_<sve_fp_op><mode>" | |
1473 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") | |
1474 (unspec:VNx4SF_ONLY | |
1475 [(match_operand:<VNARROW> 1 "register_operand" "w, w") | |
1476 (match_operand:<VNARROW> 2 "register_operand" "w, w") | |
1477 (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")] | |
1478 SVE2_FP_TERNARY_LONG))] | |
1479 "TARGET_SVE2" | |
1480 "@ | |
1481 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype> | |
1482 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" | |
1483 [(set_attr "movprfx" "*,yes")] | |
1484 ) | |
1485 | |
1486 (define_insn "@aarch64_<sve_fp_op>_lane_<mode>" | |
1487 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") | |
1488 (unspec:VNx4SF_ONLY | |
1489 [(match_operand:<VNARROW> 1 "register_operand" "w, w") | |
1490 (unspec:<VNARROW> | |
1491 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1492 (match_operand:SI 3 "const_int_operand")] | |
1493 UNSPEC_SVE_LANE_SELECT) | |
1494 (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")] | |
1495 SVE2_FP_TERNARY_LONG_LANE))] | |
1496 "TARGET_SVE2" | |
1497 "@ | |
1498 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3] | |
1499 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" | |
1500 [(set_attr "movprfx" "*,yes")] | |
1501 ) | |
1502 | |
1503 ;; ========================================================================= | |
1504 ;; == Narrowing arithnetic | |
1505 ;; ========================================================================= | |
1506 | |
1507 ;; ------------------------------------------------------------------------- | |
1508 ;; ---- [INT] Narrowing unary arithmetic | |
1509 ;; ------------------------------------------------------------------------- | |
1510 ;; Includes: | |
1511 ;; - SQXTNB | |
1512 ;; - SQXTNT | |
1513 ;; - SQXTUNB | |
1514 ;; - SQXTUNT | |
1515 ;; - UQXTNB | |
1516 ;; - UQXTNT | |
1517 ;; ------------------------------------------------------------------------- | |
1518 | |
1519 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1520 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1521 (unspec:<VNARROW> | |
1522 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")] | |
1523 SVE2_INT_UNARY_NARROWB))] | |
1524 "TARGET_SVE2" | |
1525 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>" | |
1526 ) | |
1527 | |
1528 ;; These instructions do not take MOVPRFX. | |
1529 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1530 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1531 (unspec:<VNARROW> | |
1532 [(match_operand:<VNARROW> 1 "register_operand" "0") | |
1533 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] | |
1534 SVE2_INT_UNARY_NARROWT))] | |
1535 "TARGET_SVE2" | |
1536 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>" | |
1537 ) | |
1538 | |
1539 ;; ------------------------------------------------------------------------- | |
1540 ;; ---- [INT] Narrowing binary arithmetic | |
1541 ;; ------------------------------------------------------------------------- | |
1542 ;; Includes: | |
1543 ;; - ADDHNB | |
1544 ;; - ADDHNT | |
1545 ;; - RADDHNB | |
1546 ;; - RADDHNT | |
1547 ;; - RSUBHNB | |
1548 ;; - RSUBHNT | |
1549 ;; - SUBHNB | |
1550 ;; - SUBHNT | |
1551 ;; ------------------------------------------------------------------------- | |
1552 | |
1553 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1554 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1555 (unspec:<VNARROW> | |
1556 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") | |
1557 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] | |
1558 SVE2_INT_BINARY_NARROWB))] | |
1559 "TARGET_SVE2" | |
1560 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>" | |
1561 ) | |
1562 | |
1563 ;; These instructions do not take MOVPRFX. | |
1564 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1565 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1566 (unspec:<VNARROW> | |
1567 [(match_operand:<VNARROW> 1 "register_operand" "0") | |
1568 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") | |
1569 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] | |
1570 SVE2_INT_BINARY_NARROWT))] | |
1571 "TARGET_SVE2" | |
1572 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>" | |
1573 ) | |
1574 | |
1575 ;; ------------------------------------------------------------------------- | |
1576 ;; ---- [INT] Narrowing right shifts | |
1577 ;; ------------------------------------------------------------------------- | |
1578 ;; Includes: | |
1579 ;; - RSHRNB | |
1580 ;; - RSHRNT | |
1581 ;; - SHRNB | |
1582 ;; - SHRNT | |
1583 ;; - SQRSHRNB | |
1584 ;; - SQRSHRNT | |
1585 ;; - SQRSHRUNB | |
1586 ;; - SQRSHRUNT | |
1587 ;; - SQSHRNB | |
1588 ;; - SQSHRNT | |
1589 ;; - SQSHRUNB | |
1590 ;; - SQSHRUNT | |
1591 ;; - UQRSHRNB | |
1592 ;; - UQRSHRNT | |
1593 ;; - UQSHRNB | |
1594 ;; - UQSHRNT | |
1595 ;; ------------------------------------------------------------------------- | |
1596 | |
1597 ;; The immediate range is enforced before generating the instruction. | |
1598 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1599 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1600 (unspec:<VNARROW> | |
1601 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") | |
1602 (match_operand:DI 2 "const_int_operand")] | |
1603 SVE2_INT_SHIFT_IMM_NARROWB))] | |
1604 "TARGET_SVE2" | |
1605 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2" | |
1606 ) | |
1607 | |
1608 ;; The immediate range is enforced before generating the instruction. | |
1609 ;; These instructions do not take MOVPRFX. | |
1610 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
1611 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1612 (unspec:<VNARROW> | |
1613 [(match_operand:<VNARROW> 1 "register_operand" "0") | |
1614 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") | |
1615 (match_operand:DI 3 "const_int_operand")] | |
1616 SVE2_INT_SHIFT_IMM_NARROWT))] | |
1617 "TARGET_SVE2" | |
1618 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3" | |
1619 ) | |
1620 | |
1621 ;; ========================================================================= | |
1622 ;; == Pairwise arithmetic | |
1623 ;; ========================================================================= | |
1624 | |
1625 ;; ------------------------------------------------------------------------- | |
1626 ;; ---- [INT] Pairwise arithmetic | |
1627 ;; ------------------------------------------------------------------------- | |
1628 ;; Includes: | |
1629 ;; - ADDP | |
1630 ;; - SMAXP | |
1631 ;; - SMINP | |
1632 ;; - UMAXP | |
1633 ;; - UMINP | |
1634 ;; ------------------------------------------------------------------------- | |
1635 | |
1636 (define_insn "@aarch64_pred_<sve_int_op><mode>" | |
1637 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1638 (unspec:SVE_FULL_I | |
1639 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1640 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") | |
1641 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
1642 SVE2_INT_BINARY_PAIR))] | |
1643 "TARGET_SVE2" | |
1644 "@ | |
1645 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1646 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1647 [(set_attr "movprfx" "*,yes")] | |
1648 ) | |
1649 | |
1650 ;; ------------------------------------------------------------------------- | |
1651 ;; ---- [FP] Pairwise arithmetic | |
1652 ;; ------------------------------------------------------------------------- | |
1653 ;; Includes: | |
1654 ;; - FADDP | |
1655 ;; - FMAXP | |
1656 ;; - FMAXNMP | |
1657 ;; - FMINP | |
1658 ;; - FMINNMP | |
1659 ;; ------------------------------------------------------------------------- | |
1660 | |
1661 (define_insn "@aarch64_pred_<sve_fp_op><mode>" | |
1662 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
1663 (unspec:SVE_FULL_F | |
1664 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1665 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
1666 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
1667 SVE2_FP_BINARY_PAIR))] | |
1668 "TARGET_SVE2" | |
1669 "@ | |
1670 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1671 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1672 [(set_attr "movprfx" "*,yes")] | |
1673 ) | |
1674 | |
1675 ;; ------------------------------------------------------------------------- | |
1676 ;; ---- [INT] Pairwise arithmetic with accumulation | |
1677 ;; ------------------------------------------------------------------------- | |
1678 ;; Includes: | |
1679 ;; - SADALP | |
1680 ;; - UADALP | |
1681 ;; ------------------------------------------------------------------------- | |
1682 | |
1683 ;; Predicated pairwise absolute difference and accumulate with merging. | |
1684 (define_expand "@cond_<sve_int_op><mode>" | |
1685 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") | |
1686 (unspec:SVE_FULL_HSDI | |
1687 [(match_operand:<VPRED> 1 "register_operand") | |
1688 (unspec:SVE_FULL_HSDI | |
1689 [(match_dup 1) | |
1690 (match_operand:SVE_FULL_HSDI 2 "register_operand") | |
1691 (match_operand:<VNARROW> 3 "register_operand")] | |
1692 SVE2_INT_BINARY_PAIR_LONG) | |
1693 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")] | |
1694 UNSPEC_SEL))] | |
1695 "TARGET_SVE2" | |
1696 { | |
1697 /* Only target code is aware of these operations, so we don't need | |
1698 to handle the fully-general case. */ | |
1699 gcc_assert (rtx_equal_p (operands[2], operands[4]) | |
1700 || CONSTANT_P (operands[4])); | |
1701 }) | |
1702 | |
1703 ;; Predicated pairwise absolute difference and accumulate, merging with | |
1704 ;; the first input. | |
1705 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" | |
1706 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") | |
1707 (unspec:SVE_FULL_HSDI | |
1708 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1709 (unspec:SVE_FULL_HSDI | |
1710 [(match_operand 4) | |
1711 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") | |
1712 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1713 SVE2_INT_BINARY_PAIR_LONG) | |
1714 (match_dup 2)] | |
1715 UNSPEC_SEL))] | |
1716 "TARGET_SVE2" | |
1717 "@ | |
1718 <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> | |
1719 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" | |
1720 "&& !CONSTANT_P (operands[4])" | |
1721 { | |
1722 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
1723 } | |
1724 [(set_attr "movprfx" "*,yes")] | |
1725 ) | |
1726 | |
1727 ;; Predicated pairwise absolute difference and accumulate, merging with zero. | |
1728 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" | |
1729 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w") | |
1730 (unspec:SVE_FULL_HSDI | |
1731 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1732 (unspec:SVE_FULL_HSDI | |
1733 [(match_operand 5) | |
1734 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") | |
1735 (match_operand:<VNARROW> 3 "register_operand" "w, w")] | |
1736 SVE2_INT_BINARY_PAIR_LONG) | |
1737 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")] | |
1738 UNSPEC_SEL))] | |
1739 "TARGET_SVE2" | |
1740 "@ | |
1741 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> | |
1742 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" | |
1743 "&& !CONSTANT_P (operands[5])" | |
1744 { | |
1745 operands[5] = CONSTM1_RTX (<VPRED>mode); | |
1746 } | |
1747 [(set_attr "movprfx" "yes")] | |
1748 ) | |
1749 | |
1750 ;; ========================================================================= | |
1751 ;; == Complex arithmetic | |
1752 ;; ========================================================================= | |
1753 | |
1754 ;; ------------------------------------------------------------------------- | |
1755 ;; ---- [INT] Complex binary operations | |
1756 ;; ------------------------------------------------------------------------- | |
1757 ;; Includes: | |
1758 ;; - CADD | |
1759 ;; - SQCADD | |
1760 ;; ------------------------------------------------------------------------- | |
1761 | |
1762 (define_insn "@aarch64_sve_<optab><mode>" | |
1763 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1764 (unspec:SVE_FULL_I | |
1765 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") | |
1766 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")] | |
1767 SVE2_INT_CADD))] | |
1768 "TARGET_SVE2" | |
1769 "@ | |
1770 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot> | |
1771 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>" | |
1772 [(set_attr "movprfx" "*,yes")] | |
1773 ) | |
1774 | |
1775 ;; ------------------------------------------------------------------------- | |
1776 ;; ---- [INT] Complex ternary operations | |
1777 ;; ------------------------------------------------------------------------- | |
1778 ;; Includes: | |
1779 ;; - CMLA | |
1780 ;; - SQRDCMLA | |
1781 ;; ------------------------------------------------------------------------- | |
1782 | |
1783 (define_insn "@aarch64_sve_<optab><mode>" | |
1784 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") | |
1785 (unspec:SVE_FULL_I | |
1786 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") | |
1787 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") | |
1788 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
1789 SVE2_INT_CMLA))] | |
1790 "TARGET_SVE2" | |
1791 "@ | |
1792 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot> | |
1793 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>" | |
1794 [(set_attr "movprfx" "*,yes")] | |
1795 ) | |
1796 | |
1797 (define_insn "@aarch64_<optab>_lane_<mode>" | |
1798 [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w") | |
1799 (unspec:SVE_FULL_HSI | |
1800 [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w") | |
1801 (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w") | |
1802 (unspec:SVE_FULL_HSI | |
1803 [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1804 (match_operand:SI 4 "const_int_operand")] | |
1805 UNSPEC_SVE_LANE_SELECT)] | |
1806 SVE2_INT_CMLA))] | |
1807 "TARGET_SVE2" | |
1808 "@ | |
1809 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot> | |
1810 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>" | |
1811 [(set_attr "movprfx" "*,yes")] | |
1812 ) | |
1813 | |
1814 ;; ------------------------------------------------------------------------- | |
1815 ;; ---- [INT] Complex dot product | |
1816 ;; ------------------------------------------------------------------------- | |
1817 ;; Includes: | |
1818 ;; - CDOT | |
1819 ;; ------------------------------------------------------------------------- | |
1820 | |
1821 (define_insn "@aarch64_sve_<optab><mode>" | |
1822 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1823 (unspec:SVE_FULL_SDI | |
1824 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") | |
1825 (match_operand:<VSI2QI> 2 "register_operand" "w, w") | |
1826 (match_operand:<VSI2QI> 3 "register_operand" "w, w")] | |
1827 SVE2_INT_CDOT))] | |
1828 "TARGET_SVE2" | |
1829 "@ | |
1830 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot> | |
1831 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>" | |
1832 [(set_attr "movprfx" "*,yes")] | |
1833 ) | |
1834 | |
1835 (define_insn "@aarch64_<optab>_lane_<mode>" | |
1836 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") | |
1837 (unspec:SVE_FULL_SDI | |
1838 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") | |
1839 (match_operand:<VSI2QI> 2 "register_operand" "w, w") | |
1840 (unspec:<VSI2QI> | |
1841 [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
1842 (match_operand:SI 4 "const_int_operand")] | |
1843 UNSPEC_SVE_LANE_SELECT)] | |
1844 SVE2_INT_CDOT))] | |
1845 "TARGET_SVE2" | |
1846 "@ | |
1847 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot> | |
1848 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>" | |
1849 [(set_attr "movprfx" "*,yes")] | |
1850 ) | |
1851 | |
1852 ;; ========================================================================= | |
1853 ;; == Conversions | |
1854 ;; ========================================================================= | |
1855 | |
1856 ;; ------------------------------------------------------------------------- | |
1857 ;; ---- [FP<-FP] Widening conversions | |
1858 ;; ------------------------------------------------------------------------- | |
1859 ;; Includes: | |
1860 ;; - FCVTLT | |
1861 ;; ------------------------------------------------------------------------- | |
1862 | |
1863 ;; Predicated convert long top. | |
1864 (define_insn "@aarch64_pred_<sve_fp_op><mode>" | |
1865 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") | |
1866 (unspec:SVE_FULL_SDF | |
1867 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1868 (match_operand:SI 3 "aarch64_sve_gp_strictness") | |
1869 (match_operand:<VNARROW> 2 "register_operand" "w")] | |
1870 SVE2_COND_FP_UNARY_LONG))] | |
1871 "TARGET_SVE2" | |
1872 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" | |
1873 ) | |
1874 | |
1875 ;; Predicated convert long top with merging. | |
1876 (define_expand "@cond_<sve_fp_op><mode>" | |
1877 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") | |
1878 (unspec:SVE_FULL_SDF | |
1879 [(match_operand:<VPRED> 1 "register_operand") | |
1880 (unspec:SVE_FULL_SDF | |
1881 [(match_dup 1) | |
1882 (const_int SVE_STRICT_GP) | |
1883 (match_operand:<VNARROW> 2 "register_operand")] | |
1884 SVE2_COND_FP_UNARY_LONG) | |
1885 (match_operand:SVE_FULL_SDF 3 "register_operand")] | |
1886 UNSPEC_SEL))] | |
1887 "TARGET_SVE2" | |
1888 ) | |
1889 | |
1890 ;; These instructions do not take MOVPRFX. | |
1891 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>" | |
1892 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") | |
1893 (unspec:SVE_FULL_SDF | |
1894 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1895 (unspec:SVE_FULL_SDF | |
1896 [(match_operand 4) | |
1897 (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
1898 (match_operand:<VNARROW> 2 "register_operand" "w")] | |
1899 SVE2_COND_FP_UNARY_LONG) | |
1900 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] | |
1901 UNSPEC_SEL))] | |
1902 "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
1903 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" | |
1904 "&& !rtx_equal_p (operands[1], operands[4])" | |
1905 { | |
1906 operands[4] = copy_rtx (operands[1]); | |
1907 } | |
1908 ) | |
1909 | |
1910 ;; ------------------------------------------------------------------------- | |
1911 ;; ---- [FP<-FP] Narrowing conversions | |
1912 ;; ------------------------------------------------------------------------- | |
1913 ;; Includes: | |
1914 ;; - FCVTNT | |
1915 ;; - FCVTX | |
1916 ;; - FCVTXNT | |
1917 ;; ------------------------------------------------------------------------- | |
1918 | |
1919 ;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_* | |
1920 ;; pair because the even elements always have to be supplied for active | |
1921 ;; elements, even if the inactive elements don't matter. | |
1922 ;; | |
1923 ;; These instructions do not take MOVPRFX. | |
1924 (define_insn "@aarch64_sve_cvtnt<mode>" | |
1925 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") | |
1926 (unspec:SVE_FULL_HSF | |
1927 [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl") | |
1928 (const_int SVE_STRICT_GP) | |
1929 (match_operand:SVE_FULL_HSF 1 "register_operand" "0") | |
1930 (match_operand:<VWIDE> 3 "register_operand" "w")] | |
1931 UNSPEC_COND_FCVTNT))] | |
1932 "TARGET_SVE2" | |
1933 "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>" | |
1934 ) | |
1935 | |
1936 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that | |
1937 ;; it supports MOVPRFX). | |
1938 (define_insn "@aarch64_pred_<sve_fp_op><mode>" | |
1939 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w") | |
1940 (unspec:VNx4SF_ONLY | |
1941 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
1942 (match_operand:SI 3 "aarch64_sve_gp_strictness") | |
1943 (match_operand:<VWIDE> 2 "register_operand" "w")] | |
1944 SVE2_COND_FP_UNARY_NARROWB))] | |
1945 "TARGET_SVE2" | |
1946 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
1947 ) | |
1948 | |
1949 ;; Predicated FCVTX with merging. | |
1950 (define_expand "@cond_<sve_fp_op><mode>" | |
1951 [(set (match_operand:VNx4SF_ONLY 0 "register_operand") | |
1952 (unspec:VNx4SF_ONLY | |
1953 [(match_operand:<VWIDE_PRED> 1 "register_operand") | |
1954 (unspec:VNx4SF_ONLY | |
1955 [(match_dup 1) | |
1956 (const_int SVE_STRICT_GP) | |
1957 (match_operand:<VWIDE> 2 "register_operand")] | |
1958 SVE2_COND_FP_UNARY_NARROWB) | |
1959 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] | |
1960 UNSPEC_SEL))] | |
1961 "TARGET_SVE2" | |
1962 ) | |
1963 | |
1964 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any" | |
1965 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") | |
1966 (unspec:VNx4SF_ONLY | |
1967 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl") | |
1968 (unspec:VNx4SF_ONLY | |
1969 [(match_operand 4) | |
1970 (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
1971 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")] | |
1972 SVE2_COND_FP_UNARY_NARROWB) | |
1973 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
1974 UNSPEC_SEL))] | |
1975 "TARGET_SVE2 | |
1976 && !rtx_equal_p (operands[2], operands[3]) | |
1977 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
1978 "@ | |
1979 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> | |
1980 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> | |
1981 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
1982 "&& !rtx_equal_p (operands[1], operands[4])" | |
1983 { | |
1984 operands[4] = copy_rtx (operands[1]); | |
1985 } | |
1986 [(set_attr "movprfx" "*,yes,yes")] | |
1987 ) | |
1988 | |
1989 ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* | |
1990 ;; pair because the even elements always have to be supplied for active | |
1991 ;; elements, even if the inactive elements don't matter. | |
1992 ;; | |
1993 ;; These instructions do not take MOVPRFX. | |
1994 (define_insn "@aarch64_sve2_cvtxnt<mode>" | |
1995 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") | |
1996 (unspec:<VNARROW> | |
1997 [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
1998 (const_int SVE_STRICT_GP) | |
1999 (match_operand:<VNARROW> 1 "register_operand" "0") | |
2000 (match_operand:VNx2DF_ONLY 3 "register_operand" "w")] | |
2001 UNSPEC_COND_FCVTXNT))] | |
2002 "TARGET_SVE2" | |
2003 "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" | |
2004 ) | |
2005 | |
2006 ;; ========================================================================= | |
2007 ;; == Other arithmetic | |
2008 ;; ========================================================================= | |
2009 | |
2010 ;; ------------------------------------------------------------------------- | |
2011 ;; ---- [INT] Reciprocal approximation | |
2012 ;; ------------------------------------------------------------------------- | |
2013 ;; Includes: | |
2014 ;; - URECPE | |
2015 ;; - URSQRTE | |
2016 ;; ------------------------------------------------------------------------- | |
2017 | |
2018 ;; Predicated integer unary operations. | |
2019 (define_insn "@aarch64_pred_<sve_int_op><mode>" | |
2020 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w") | |
2021 (unspec:VNx4SI_ONLY | |
2022 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2023 (unspec:VNx4SI_ONLY | |
2024 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w")] | |
2025 SVE2_U32_UNARY)] | |
2026 UNSPEC_PRED_X))] | |
2027 "TARGET_SVE2" | |
2028 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2029 ) | |
2030 | |
2031 ;; Predicated integer unary operations with merging. | |
2032 (define_expand "@cond_<sve_int_op><mode>" | |
2033 [(set (match_operand:VNx4SI_ONLY 0 "register_operand") | |
2034 (unspec:VNx4SI_ONLY | |
2035 [(match_operand:<VPRED> 1 "register_operand") | |
2036 (unspec:VNx4SI_ONLY | |
2037 [(match_dup 4) | |
2038 (unspec:VNx4SI_ONLY | |
2039 [(match_operand:VNx4SI_ONLY 2 "register_operand")] | |
2040 SVE2_U32_UNARY)] | |
2041 UNSPEC_PRED_X) | |
2042 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] | |
2043 UNSPEC_SEL))] | |
2044 "TARGET_SVE2" | |
2045 { | |
2046 operands[4] = CONSTM1_RTX (<MODE>mode); | |
2047 } | |
2048 ) | |
2049 | |
2050 (define_insn_and_rewrite "*cond_<sve_int_op><mode>" | |
2051 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w") | |
2052 (unspec:VNx4SI_ONLY | |
2053 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2054 (unspec:VNx4SI_ONLY | |
2055 [(match_operand 4) | |
2056 (unspec:VNx4SI_ONLY | |
2057 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] | |
2058 SVE2_U32_UNARY)] | |
2059 UNSPEC_PRED_X) | |
2060 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
2061 UNSPEC_SEL))] | |
2062 "TARGET_SVE2" | |
2063 "@ | |
2064 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2065 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2066 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2067 "&& !CONSTANT_P (operands[4])" | |
2068 { | |
2069 operands[4] = CONSTM1_RTX (<VPRED>mode); | |
2070 } | |
2071 [(set_attr "movprfx" "*,yes,yes")] | |
2072 ) | |
2073 | |
2074 ;; ------------------------------------------------------------------------- | |
2075 ;; ---- [INT<-FP] Base-2 logarithm | |
2076 ;; ------------------------------------------------------------------------- | |
2077 ;; Includes: | |
2078 ;; - FLOGB | |
2079 ;; ------------------------------------------------------------------------- | |
2080 | |
2081 ;; Predicated FLOGB. | |
2082 (define_insn "@aarch64_pred_<sve_fp_op><mode>" | |
2083 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") | |
2084 (unspec:<V_INT_EQUIV> | |
2085 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2086 (match_operand:SI 3 "aarch64_sve_gp_strictness") | |
2087 (match_operand:SVE_FULL_F 2 "register_operand" "w")] | |
2088 SVE2_COND_INT_UNARY_FP))] | |
2089 "TARGET_SVE2" | |
2090 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2091 ) | |
2092 | |
2093 ;; Predicated FLOGB with merging. | |
2094 (define_expand "@cond_<sve_fp_op><mode>" | |
2095 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
2096 (unspec:<V_INT_EQUIV> | |
2097 [(match_operand:<VPRED> 1 "register_operand") | |
2098 (unspec:<V_INT_EQUIV> | |
2099 [(match_dup 1) | |
2100 (const_int SVE_STRICT_GP) | |
2101 (match_operand:SVE_FULL_F 2 "register_operand")] | |
2102 SVE2_COND_INT_UNARY_FP) | |
2103 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")] | |
2104 UNSPEC_SEL))] | |
2105 "TARGET_SVE2" | |
2106 ) | |
2107 | |
2108 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>" | |
2109 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w") | |
2110 (unspec:<V_INT_EQUIV> | |
2111 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2112 (unspec:<V_INT_EQUIV> | |
2113 [(match_operand 4) | |
2114 (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
2115 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] | |
2116 SVE2_COND_INT_UNARY_FP) | |
2117 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
2118 UNSPEC_SEL))] | |
2119 "TARGET_SVE2 | |
2120 && !rtx_equal_p (operands[2], operands[3]) | |
2121 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
2122 "@ | |
2123 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2124 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2125 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2126 "&& !rtx_equal_p (operands[1], operands[4])" | |
2127 { | |
2128 operands[4] = copy_rtx (operands[1]); | |
2129 } | |
2130 [(set_attr "movprfx" "*,yes,yes")] | |
2131 ) | |
2132 | |
2133 ;; ------------------------------------------------------------------------- | |
2134 ;; ---- [INT] Polynomial multiplication | |
2135 ;; ------------------------------------------------------------------------- | |
2136 ;; Includes: | |
2137 ;; - PMUL | |
2138 ;; - PMULLB | |
2139 ;; - PMULLT | |
2140 ;; ------------------------------------------------------------------------- | |
2141 | |
2142 ;; Uniform PMUL. | |
2143 (define_insn "@aarch64_sve2_pmul<mode>" | |
2144 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") | |
2145 (unspec:VNx16QI_ONLY | |
2146 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") | |
2147 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] | |
2148 UNSPEC_PMUL))] | |
2149 "TARGET_SVE2" | |
2150 "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2151 ) | |
2152 | |
2153 ;; Extending PMUL, with the results modeled as wider vectors. | |
2154 ;; This representation is only possible for .H and .D, not .Q. | |
2155 (define_insn "@aarch64_sve_<optab><mode>" | |
2156 [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w") | |
2157 (unspec:SVE_FULL_HDI | |
2158 [(match_operand:<VNARROW> 1 "register_operand" "w") | |
2159 (match_operand:<VNARROW> 2 "register_operand" "w")] | |
2160 SVE2_PMULL))] | |
2161 "TARGET_SVE2" | |
2162 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" | |
2163 ) | |
2164 | |
2165 ;; Extending PMUL, with the results modeled as pairs of values. | |
2166 ;; This representation works for .H, .D and .Q, with .Q requiring | |
2167 ;; the AES extension. (This is enforced by the mode iterator.) | |
2168 (define_insn "@aarch64_sve_<optab><mode>" | |
2169 [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w") | |
2170 (unspec:SVE2_PMULL_PAIR_I | |
2171 [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w") | |
2172 (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")] | |
2173 SVE2_PMULL_PAIR))] | |
2174 "TARGET_SVE2" | |
2175 "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" | |
2176 ) | |
2177 | |
2178 ;; ========================================================================= | |
2179 ;; == Permutation | |
2180 ;; ========================================================================= | |
2181 | |
2182 ;; ------------------------------------------------------------------------- | |
2183 ;; ---- [INT,FP] General permutes | |
2184 ;; ------------------------------------------------------------------------- | |
2185 ;; Includes: | |
2186 ;; - TBL (vector pair form) | |
2187 ;; - TBX | |
2188 ;; ------------------------------------------------------------------------- | |
2189 | |
2190 ;; TBL on a pair of data vectors. | |
2191 (define_insn "@aarch64_sve2_tbl2<mode>" | |
2192 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
2193 (unspec:SVE_FULL | |
2194 [(match_operand:<VDOUBLE> 1 "register_operand" "w") | |
2195 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
2196 UNSPEC_TBL2))] | |
2197 "TARGET_SVE2" | |
2198 "tbl\t%0.<Vetype>, %1, %2.<Vetype>" | |
2199 ) | |
2200 | |
2201 ;; TBX. These instructions do not take MOVPRFX. | |
2202 (define_insn "@aarch64_sve2_tbx<mode>" | |
2203 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
2204 (unspec:SVE_FULL | |
2205 [(match_operand:SVE_FULL 1 "register_operand" "0") | |
2206 (match_operand:SVE_FULL 2 "register_operand" "w") | |
2207 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")] | |
2208 UNSPEC_TBX))] | |
2209 "TARGET_SVE2" | |
2210 "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
2211 ) | |
2212 | |
2213 ;; ------------------------------------------------------------------------- | |
2214 ;; ---- [INT] Optional bit-permute extensions | |
2215 ;; ------------------------------------------------------------------------- | |
2216 ;; Includes: | |
2217 ;; - BDEP | |
2218 ;; - BEXT | |
2219 ;; - BGRP | |
2220 ;; ------------------------------------------------------------------------- | |
2221 | |
2222 (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
2223 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") | |
2224 (unspec:SVE_FULL_I | |
2225 [(match_operand:SVE_FULL_I 1 "register_operand" "w") | |
2226 (match_operand:SVE_FULL_I 2 "register_operand" "w")] | |
2227 SVE2_INT_BITPERM))] | |
2228 "TARGET_SVE2_BITPERM" | |
2229 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2230 ) | |
2231 | |
2232 ;; ========================================================================= | |
2233 ;; == General | |
2234 ;; ========================================================================= | |
2235 | |
2236 ;; ------------------------------------------------------------------------- | |
2237 ;; ---- Check for aliases between pointers | |
2238 ;; ------------------------------------------------------------------------- | |
2239 ;; The patterns in this section are synthetic: WHILERW and WHILEWR are | |
2240 ;; defined in aarch64-sve.md instead. | |
2241 ;; ------------------------------------------------------------------------- | |
2242 | |
2243 ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only | |
2244 ;; possible if the accesses we're checking are exactly the same size | |
2245 ;; as an SVE vector. | |
2246 (define_expand "check_<raw_war>_ptrs<mode>" | |
2247 [(match_operand:GPI 0 "register_operand") | |
2248 (unspec:VNx16BI | |
2249 [(match_operand:GPI 1 "register_operand") | |
2250 (match_operand:GPI 2 "register_operand") | |
2251 (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") | |
2252 (match_operand:GPI 4 "const_int_operand")] | |
2253 SVE2_WHILE_PTR)] | |
2254 "TARGET_SVE2" | |
2255 { | |
2256 /* Use the widest predicate mode we can. */ | |
2257 unsigned int align = INTVAL (operands[4]); | |
2258 if (align > 8) | |
2259 align = 8; | |
2260 machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); | |
2261 | |
2262 /* Emit a WHILERW or WHILEWR, setting the condition codes based on | |
2263 the result. */ | |
2264 emit_insn (gen_while_ptest | |
2265 (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode, | |
2266 gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], | |
2267 CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); | |
2268 | |
2269 /* Set operand 0 to true if the last bit of the predicate result is set, | |
2270 i.e. if all elements are free of dependencies. */ | |
2271 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); | |
2272 rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx); | |
2273 emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg)); | |
2274 DONE; | |
2275 }) | |
2276 | |
2277 ;; ------------------------------------------------------------------------- | |
2278 ;; ---- Histogram processing | |
2279 ;; ------------------------------------------------------------------------- | |
2280 ;; Includes: | |
2281 ;; - HISTCNT | |
2282 ;; - HISTSEG | |
2283 ;; ------------------------------------------------------------------------- | |
2284 | |
2285 (define_insn "@aarch64_sve2_histcnt<mode>" | |
2286 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") | |
2287 (unspec:SVE_FULL_SDI | |
2288 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2289 (match_operand:SVE_FULL_SDI 2 "register_operand" "w") | |
2290 (match_operand:SVE_FULL_SDI 3 "register_operand" "w")] | |
2291 UNSPEC_HISTCNT))] | |
2292 "TARGET_SVE2" | |
2293 "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
2294 ) | |
2295 | |
2296 (define_insn "@aarch64_sve2_histseg<mode>" | |
2297 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") | |
2298 (unspec:VNx16QI_ONLY | |
2299 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") | |
2300 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] | |
2301 UNSPEC_HISTSEG))] | |
2302 "TARGET_SVE2" | |
2303 "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2304 ) | |
2305 | |
2306 ;; ------------------------------------------------------------------------- | |
2307 ;; ---- String matching | |
2308 ;; ------------------------------------------------------------------------- | |
2309 ;; Includes: | |
2310 ;; - MATCH | |
2311 ;; - NMATCH | |
2312 ;; ------------------------------------------------------------------------- | |
2313 | |
2314 ;; Predicated string matching. | |
2315 (define_insn "@aarch64_pred_<sve_int_op><mode>" | |
2316 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
2317 (unspec:<VPRED> | |
2318 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2319 (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
2320 (unspec:<VPRED> | |
2321 [(match_operand:SVE_FULL_BHI 3 "register_operand" "w") | |
2322 (match_operand:SVE_FULL_BHI 4 "register_operand" "w")] | |
2323 SVE2_MATCH)] | |
2324 UNSPEC_PRED_Z)) | |
2325 (clobber (reg:CC_NZC CC_REGNUM))] | |
2326 "TARGET_SVE2" | |
2327 "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" | |
2328 ) | |
2329 | |
2330 ;; Predicated string matching in which both the flag and predicate results | |
2331 ;; are interesting. | |
2332 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc" | |
2333 [(set (reg:CC_NZC CC_REGNUM) | |
2334 (unspec:CC_NZC | |
2335 [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
2336 (match_operand 4) | |
2337 (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
2338 (unspec:<VPRED> | |
2339 [(match_operand 6) | |
2340 (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
2341 (unspec:<VPRED> | |
2342 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") | |
2343 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] | |
2344 SVE2_MATCH)] | |
2345 UNSPEC_PRED_Z)] | |
2346 UNSPEC_PTEST)) | |
2347 (set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
2348 (unspec:<VPRED> | |
2349 [(match_dup 6) | |
2350 (match_dup 7) | |
2351 (unspec:<VPRED> | |
2352 [(match_dup 2) | |
2353 (match_dup 3)] | |
2354 SVE2_MATCH)] | |
2355 UNSPEC_PRED_Z))] | |
2356 "TARGET_SVE2 | |
2357 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
2358 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
2359 "&& !rtx_equal_p (operands[4], operands[6])" | |
2360 { | |
2361 operands[6] = copy_rtx (operands[4]); | |
2362 operands[7] = operands[5]; | |
2363 } | |
2364 ) | |
2365 | |
2366 ;; Predicated string matching in which only the flags result is interesting. | |
2367 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest" | |
2368 [(set (reg:CC_NZC CC_REGNUM) | |
2369 (unspec:CC_NZC | |
2370 [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
2371 (match_operand 4) | |
2372 (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
2373 (unspec:<VPRED> | |
2374 [(match_operand 6) | |
2375 (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
2376 (unspec:<VPRED> | |
2377 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") | |
2378 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] | |
2379 SVE2_MATCH)] | |
2380 UNSPEC_PRED_Z)] | |
2381 UNSPEC_PTEST)) | |
2382 (clobber (match_scratch:<VPRED> 0 "=Upa"))] | |
2383 "TARGET_SVE2 | |
2384 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
2385 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
2386 "&& !rtx_equal_p (operands[4], operands[6])" | |
2387 { | |
2388 operands[6] = copy_rtx (operands[4]); | |
2389 operands[7] = operands[5]; | |
2390 } | |
2391 ) | |
2392 | |
2393 ;; ========================================================================= | |
2394 ;; == Crypotographic extensions | |
2395 ;; ========================================================================= | |
2396 | |
2397 ;; ------------------------------------------------------------------------- | |
2398 ;; ---- Optional AES extensions | |
2399 ;; ------------------------------------------------------------------------- | |
2400 ;; Includes: | |
2401 ;; - AESD | |
2402 ;; - AESE | |
2403 ;; - AESIMC | |
2404 ;; - AESMC | |
2405 ;; ------------------------------------------------------------------------- | |
2406 | |
2407 ;; AESD and AESE. | |
2408 (define_insn "aarch64_sve2_aes<aes_op>" | |
2409 [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
2410 (unspec:VNx16QI | |
2411 [(xor:VNx16QI | |
2412 (match_operand:VNx16QI 1 "register_operand" "%0") | |
2413 (match_operand:VNx16QI 2 "register_operand" "w"))] | |
2414 CRYPTO_AES))] | |
2415 "TARGET_SVE2_AES" | |
2416 "aes<aes_op>\t%0.b, %0.b, %2.b" | |
2417 [(set_attr "type" "crypto_aese")] | |
2418 ) | |
2419 | |
2420 ;; AESMC and AESIMC. These instructions do not take MOVPRFX. | |
2421 (define_insn "aarch64_sve2_aes<aesmc_op>" | |
2422 [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
2423 (unspec:VNx16QI | |
2424 [(match_operand:VNx16QI 1 "register_operand" "0")] | |
2425 CRYPTO_AESMC))] | |
2426 "TARGET_SVE2_AES" | |
2427 "aes<aesmc_op>\t%0.b, %0.b" | |
2428 [(set_attr "type" "crypto_aesmc")] | |
2429 ) | |
2430 | |
2431 ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want | |
2432 ;; to keep the two together and enforce the register dependency without | |
2433 ;; scheduling or register allocation messing up the order or introducing | |
2434 ;; moves inbetween. Mash the two together during combine. | |
2435 | |
2436 (define_insn "*aarch64_sve2_aese_fused" | |
2437 [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
2438 (unspec:VNx16QI | |
2439 [(unspec:VNx16QI | |
2440 [(xor:VNx16QI | |
2441 (match_operand:VNx16QI 1 "register_operand" "%0") | |
2442 (match_operand:VNx16QI 2 "register_operand" "w"))] | |
2443 UNSPEC_AESE)] | |
2444 UNSPEC_AESMC))] | |
2445 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" | |
2446 "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b" | |
2447 [(set_attr "type" "crypto_aese") | |
2448 (set_attr "length" "8")] | |
2449 ) | |
2450 | |
2451 (define_insn "*aarch64_sve2_aesd_fused" | |
2452 [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
2453 (unspec:VNx16QI | |
2454 [(unspec:VNx16QI | |
2455 [(xor:VNx16QI | |
2456 (match_operand:VNx16QI 1 "register_operand" "%0") | |
2457 (match_operand:VNx16QI 2 "register_operand" "w"))] | |
2458 UNSPEC_AESD)] | |
2459 UNSPEC_AESIMC))] | |
2460 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" | |
2461 "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b" | |
2462 [(set_attr "type" "crypto_aese") | |
2463 (set_attr "length" "8")] | |
2464 ) | |
2465 | |
2466 ;; ------------------------------------------------------------------------- | |
2467 ;; ---- Optional SHA-3 extensions | |
2468 ;; ------------------------------------------------------------------------- | |
2469 ;; Includes: | |
2470 ;; - RAX1 | |
2471 ;; ------------------------------------------------------------------------- | |
2472 | |
2473 (define_insn "aarch64_sve2_rax1" | |
2474 [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
2475 (xor:VNx2DI | |
2476 (rotate:VNx2DI | |
2477 (match_operand:VNx2DI 2 "register_operand" "w") | |
2478 (const_int 1)) | |
2479 (match_operand:VNx2DI 1 "register_operand" "w")))] | |
2480 "TARGET_SVE2_SHA3" | |
2481 "rax1\t%0.d, %1.d, %2.d" | |
2482 [(set_attr "type" "crypto_sha3")] | |
2483 ) | |
2484 | |
2485 ;; ------------------------------------------------------------------------- | |
2486 ;; ---- Optional SM4 extensions | |
2487 ;; ------------------------------------------------------------------------- | |
2488 ;; Includes: | |
2489 ;; - SM4E | |
2490 ;; - SM4EKEY | |
2491 ;; ------------------------------------------------------------------------- | |
2492 | |
2493 ;; These instructions do not take MOVPRFX. | |
2494 (define_insn "aarch64_sve2_sm4e" | |
2495 [(set (match_operand:VNx4SI 0 "register_operand" "=w") | |
2496 (unspec:VNx4SI | |
2497 [(match_operand:VNx4SI 1 "register_operand" "0") | |
2498 (match_operand:VNx4SI 2 "register_operand" "w")] | |
2499 UNSPEC_SM4E))] | |
2500 "TARGET_SVE2_SM4" | |
2501 "sm4e\t%0.s, %0.s, %2.s" | |
2502 [(set_attr "type" "crypto_sm4")] | |
2503 ) | |
2504 | |
2505 (define_insn "aarch64_sve2_sm4ekey" | |
2506 [(set (match_operand:VNx4SI 0 "register_operand" "=w") | |
2507 (unspec:VNx4SI | |
2508 [(match_operand:VNx4SI 1 "register_operand" "w") | |
2509 (match_operand:VNx4SI 2 "register_operand" "w")] | |
2510 UNSPEC_SM4EKEY))] | |
2511 "TARGET_SVE2_SM4" | |
2512 "sm4ekey\t%0.s, %1.s, %2.s" | |
2513 [(set_attr "type" "crypto_sm4")] | |
2514 ) |